* config/i386/i386.c (examine_argument): Return bool. Return true if
[official-gcc.git] / gcc / config / i386 / i386.c
blobf2e6957169deae4f2ea0887518289ba523a787db
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
85 static rtx legitimize_dllimport_symbol (rtx, bool);
86 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
87 static rtx legitimize_pe_coff_symbol (rtx, bool);
89 #ifndef CHECK_STACK_LIMIT
90 #define CHECK_STACK_LIMIT (-1)
91 #endif
93 /* Return index of given mode in mult and division cost tables. */
94 #define MODE_INDEX(mode) \
95 ((mode) == QImode ? 0 \
96 : (mode) == HImode ? 1 \
97 : (mode) == SImode ? 2 \
98 : (mode) == DImode ? 3 \
99 : 4)
101 /* Processor costs (relative to an add) */
102 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
103 #define COSTS_N_BYTES(N) ((N) * 2)
105 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
107 static stringop_algs ix86_size_memcpy[2] = {
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
110 static stringop_algs ix86_size_memset[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 const
115 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
116 COSTS_N_BYTES (2), /* cost of an add instruction */
117 COSTS_N_BYTES (3), /* cost of a lea instruction */
118 COSTS_N_BYTES (2), /* variable shift costs */
119 COSTS_N_BYTES (3), /* constant shift costs */
120 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
121 COSTS_N_BYTES (3), /* HI */
122 COSTS_N_BYTES (3), /* SI */
123 COSTS_N_BYTES (3), /* DI */
124 COSTS_N_BYTES (5)}, /* other */
125 0, /* cost of multiply per each bit set */
126 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
127 COSTS_N_BYTES (3), /* HI */
128 COSTS_N_BYTES (3), /* SI */
129 COSTS_N_BYTES (3), /* DI */
130 COSTS_N_BYTES (5)}, /* other */
131 COSTS_N_BYTES (3), /* cost of movsx */
132 COSTS_N_BYTES (3), /* cost of movzx */
133 0, /* "large" insn */
134 2, /* MOVE_RATIO */
135 2, /* cost for loading QImode using movzbl */
136 {2, 2, 2}, /* cost of loading integer registers
137 in QImode, HImode and SImode.
138 Relative to reg-reg move (2). */
139 {2, 2, 2}, /* cost of storing integer registers */
140 2, /* cost of reg,reg fld/fst */
141 {2, 2, 2}, /* cost of loading fp registers
142 in SFmode, DFmode and XFmode */
143 {2, 2, 2}, /* cost of storing fp registers
144 in SFmode, DFmode and XFmode */
145 3, /* cost of moving MMX register */
146 {3, 3}, /* cost of loading MMX registers
147 in SImode and DImode */
148 {3, 3}, /* cost of storing MMX registers
149 in SImode and DImode */
150 3, /* cost of moving SSE register */
151 {3, 3, 3}, /* cost of loading SSE registers
152 in SImode, DImode and TImode */
153 {3, 3, 3}, /* cost of storing SSE registers
154 in SImode, DImode and TImode */
155 3, /* MMX or SSE register to integer */
156 0, /* size of l1 cache */
157 0, /* size of l2 cache */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 2, /* Branch cost */
161 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
162 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
163 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
164 COSTS_N_BYTES (2), /* cost of FABS instruction. */
165 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
166 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 ix86_size_memcpy,
168 ix86_size_memset,
169 1, /* scalar_stmt_cost. */
170 1, /* scalar load_cost. */
171 1, /* scalar_store_cost. */
172 1, /* vec_stmt_cost. */
173 1, /* vec_to_scalar_cost. */
174 1, /* scalar_to_vec_cost. */
175 1, /* vec_align_load_cost. */
176 1, /* vec_unalign_load_cost. */
177 1, /* vec_store_cost. */
178 1, /* cond_taken_branch_cost. */
179 1, /* cond_not_taken_branch_cost. */
182 /* Processor costs (relative to an add) */
183 static stringop_algs i386_memcpy[2] = {
184 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
185 DUMMY_STRINGOP_ALGS};
186 static stringop_algs i386_memset[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
190 static const
191 struct processor_costs i386_cost = { /* 386 specific costs */
192 COSTS_N_INSNS (1), /* cost of an add instruction */
193 COSTS_N_INSNS (1), /* cost of a lea instruction */
194 COSTS_N_INSNS (3), /* variable shift costs */
195 COSTS_N_INSNS (2), /* constant shift costs */
196 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
197 COSTS_N_INSNS (6), /* HI */
198 COSTS_N_INSNS (6), /* SI */
199 COSTS_N_INSNS (6), /* DI */
200 COSTS_N_INSNS (6)}, /* other */
201 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
202 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
203 COSTS_N_INSNS (23), /* HI */
204 COSTS_N_INSNS (23), /* SI */
205 COSTS_N_INSNS (23), /* DI */
206 COSTS_N_INSNS (23)}, /* other */
207 COSTS_N_INSNS (3), /* cost of movsx */
208 COSTS_N_INSNS (2), /* cost of movzx */
209 15, /* "large" insn */
210 3, /* MOVE_RATIO */
211 4, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {8, 8, 8}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {8, 8, 8}, /* cost of storing fp registers
220 in SFmode, DFmode and XFmode */
221 2, /* cost of moving MMX register */
222 {4, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {4, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of l1 cache */
233 0, /* size of l2 cache */
234 0, /* size of prefetch block */
235 0, /* number of parallel prefetches */
236 1, /* Branch cost */
237 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
238 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
239 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
240 COSTS_N_INSNS (22), /* cost of FABS instruction. */
241 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
242 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 i386_memcpy,
244 i386_memset,
245 1, /* scalar_stmt_cost. */
246 1, /* scalar load_cost. */
247 1, /* scalar_store_cost. */
248 1, /* vec_stmt_cost. */
249 1, /* vec_to_scalar_cost. */
250 1, /* scalar_to_vec_cost. */
251 1, /* vec_align_load_cost. */
252 2, /* vec_unalign_load_cost. */
253 1, /* vec_store_cost. */
254 3, /* cond_taken_branch_cost. */
255 1, /* cond_not_taken_branch_cost. */
258 static stringop_algs i486_memcpy[2] = {
259 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
260 DUMMY_STRINGOP_ALGS};
261 static stringop_algs i486_memset[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
265 static const
266 struct processor_costs i486_cost = { /* 486 specific costs */
267 COSTS_N_INSNS (1), /* cost of an add instruction */
268 COSTS_N_INSNS (1), /* cost of a lea instruction */
269 COSTS_N_INSNS (3), /* variable shift costs */
270 COSTS_N_INSNS (2), /* constant shift costs */
271 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
272 COSTS_N_INSNS (12), /* HI */
273 COSTS_N_INSNS (12), /* SI */
274 COSTS_N_INSNS (12), /* DI */
275 COSTS_N_INSNS (12)}, /* other */
276 1, /* cost of multiply per each bit set */
277 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
278 COSTS_N_INSNS (40), /* HI */
279 COSTS_N_INSNS (40), /* SI */
280 COSTS_N_INSNS (40), /* DI */
281 COSTS_N_INSNS (40)}, /* other */
282 COSTS_N_INSNS (3), /* cost of movsx */
283 COSTS_N_INSNS (2), /* cost of movzx */
284 15, /* "large" insn */
285 3, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {2, 4, 2}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 4, 2}, /* cost of storing integer registers */
291 2, /* cost of reg,reg fld/fst */
292 {8, 8, 8}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {8, 8, 8}, /* cost of storing fp registers
295 in SFmode, DFmode and XFmode */
296 2, /* cost of moving MMX register */
297 {4, 8}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {4, 8}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {4, 8, 16}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {4, 8, 16}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 3, /* MMX or SSE register to integer */
307 4, /* size of l1 cache. 486 has 8kB cache
308 shared for code and data, so 4kB is
309 not really precise. */
310 4, /* size of l2 cache */
311 0, /* size of prefetch block */
312 0, /* number of parallel prefetches */
313 1, /* Branch cost */
314 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
315 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
316 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
319 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 i486_memcpy,
321 i486_memset,
322 1, /* scalar_stmt_cost. */
323 1, /* scalar load_cost. */
324 1, /* scalar_store_cost. */
325 1, /* vec_stmt_cost. */
326 1, /* vec_to_scalar_cost. */
327 1, /* scalar_to_vec_cost. */
328 1, /* vec_align_load_cost. */
329 2, /* vec_unalign_load_cost. */
330 1, /* vec_store_cost. */
331 3, /* cond_taken_branch_cost. */
332 1, /* cond_not_taken_branch_cost. */
335 static stringop_algs pentium_memcpy[2] = {
336 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
337 DUMMY_STRINGOP_ALGS};
338 static stringop_algs pentium_memset[2] = {
339 {libcall, {{-1, rep_prefix_4_byte, false}}},
340 DUMMY_STRINGOP_ALGS};
342 static const
343 struct processor_costs pentium_cost = {
344 COSTS_N_INSNS (1), /* cost of an add instruction */
345 COSTS_N_INSNS (1), /* cost of a lea instruction */
346 COSTS_N_INSNS (4), /* variable shift costs */
347 COSTS_N_INSNS (1), /* constant shift costs */
348 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
349 COSTS_N_INSNS (11), /* HI */
350 COSTS_N_INSNS (11), /* SI */
351 COSTS_N_INSNS (11), /* DI */
352 COSTS_N_INSNS (11)}, /* other */
353 0, /* cost of multiply per each bit set */
354 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
355 COSTS_N_INSNS (25), /* HI */
356 COSTS_N_INSNS (25), /* SI */
357 COSTS_N_INSNS (25), /* DI */
358 COSTS_N_INSNS (25)}, /* other */
359 COSTS_N_INSNS (3), /* cost of movsx */
360 COSTS_N_INSNS (2), /* cost of movzx */
361 8, /* "large" insn */
362 6, /* MOVE_RATIO */
363 6, /* cost for loading QImode using movzbl */
364 {2, 4, 2}, /* cost of loading integer registers
365 in QImode, HImode and SImode.
366 Relative to reg-reg move (2). */
367 {2, 4, 2}, /* cost of storing integer registers */
368 2, /* cost of reg,reg fld/fst */
369 {2, 2, 6}, /* cost of loading fp registers
370 in SFmode, DFmode and XFmode */
371 {4, 4, 6}, /* cost of storing fp registers
372 in SFmode, DFmode and XFmode */
373 8, /* cost of moving MMX register */
374 {8, 8}, /* cost of loading MMX registers
375 in SImode and DImode */
376 {8, 8}, /* cost of storing MMX registers
377 in SImode and DImode */
378 2, /* cost of moving SSE register */
379 {4, 8, 16}, /* cost of loading SSE registers
380 in SImode, DImode and TImode */
381 {4, 8, 16}, /* cost of storing SSE registers
382 in SImode, DImode and TImode */
383 3, /* MMX or SSE register to integer */
384 8, /* size of l1 cache. */
385 8, /* size of l2 cache */
386 0, /* size of prefetch block */
387 0, /* number of parallel prefetches */
388 2, /* Branch cost */
389 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
390 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
391 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
392 COSTS_N_INSNS (1), /* cost of FABS instruction. */
393 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
394 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 pentium_memcpy,
396 pentium_memset,
397 1, /* scalar_stmt_cost. */
398 1, /* scalar load_cost. */
399 1, /* scalar_store_cost. */
400 1, /* vec_stmt_cost. */
401 1, /* vec_to_scalar_cost. */
402 1, /* scalar_to_vec_cost. */
403 1, /* vec_align_load_cost. */
404 2, /* vec_unalign_load_cost. */
405 1, /* vec_store_cost. */
406 3, /* cond_taken_branch_cost. */
407 1, /* cond_not_taken_branch_cost. */
410 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
411 (we ensure the alignment). For small blocks inline loop is still a
412 noticeable win, for bigger blocks either rep movsl or rep movsb is
413 way to go. Rep movsb has apparently more expensive startup time in CPU,
414 but after 4K the difference is down in the noise. */
415 static stringop_algs pentiumpro_memcpy[2] = {
416 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
417 {8192, rep_prefix_4_byte, false},
418 {-1, rep_prefix_1_byte, false}}},
419 DUMMY_STRINGOP_ALGS};
420 static stringop_algs pentiumpro_memset[2] = {
421 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
422 {8192, rep_prefix_4_byte, false},
423 {-1, libcall, false}}},
424 DUMMY_STRINGOP_ALGS};
425 static const
426 struct processor_costs pentiumpro_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (1), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (4), /* HI */
433 COSTS_N_INSNS (4), /* SI */
434 COSTS_N_INSNS (4), /* DI */
435 COSTS_N_INSNS (4)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (17), /* HI */
439 COSTS_N_INSNS (17), /* SI */
440 COSTS_N_INSNS (17), /* DI */
441 COSTS_N_INSNS (17)}, /* other */
442 COSTS_N_INSNS (1), /* cost of movsx */
443 COSTS_N_INSNS (1), /* cost of movzx */
444 8, /* "large" insn */
445 6, /* MOVE_RATIO */
446 2, /* cost for loading QImode using movzbl */
447 {4, 4, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 2, 2}, /* cost of storing integer registers */
451 2, /* cost of reg,reg fld/fst */
452 {2, 2, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 6}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 3, /* MMX or SSE register to integer */
467 8, /* size of l1 cache. */
468 256, /* size of l2 cache */
469 32, /* size of prefetch block */
470 6, /* number of parallel prefetches */
471 2, /* Branch cost */
472 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
473 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
474 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
475 COSTS_N_INSNS (2), /* cost of FABS instruction. */
476 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
477 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
478 pentiumpro_memcpy,
479 pentiumpro_memset,
480 1, /* scalar_stmt_cost. */
481 1, /* scalar load_cost. */
482 1, /* scalar_store_cost. */
483 1, /* vec_stmt_cost. */
484 1, /* vec_to_scalar_cost. */
485 1, /* scalar_to_vec_cost. */
486 1, /* vec_align_load_cost. */
487 2, /* vec_unalign_load_cost. */
488 1, /* vec_store_cost. */
489 3, /* cond_taken_branch_cost. */
490 1, /* cond_not_taken_branch_cost. */
493 static stringop_algs geode_memcpy[2] = {
494 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
495 DUMMY_STRINGOP_ALGS};
496 static stringop_algs geode_memset[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static const
500 struct processor_costs geode_cost = {
501 COSTS_N_INSNS (1), /* cost of an add instruction */
502 COSTS_N_INSNS (1), /* cost of a lea instruction */
503 COSTS_N_INSNS (2), /* variable shift costs */
504 COSTS_N_INSNS (1), /* constant shift costs */
505 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
506 COSTS_N_INSNS (4), /* HI */
507 COSTS_N_INSNS (7), /* SI */
508 COSTS_N_INSNS (7), /* DI */
509 COSTS_N_INSNS (7)}, /* other */
510 0, /* cost of multiply per each bit set */
511 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
512 COSTS_N_INSNS (23), /* HI */
513 COSTS_N_INSNS (39), /* SI */
514 COSTS_N_INSNS (39), /* DI */
515 COSTS_N_INSNS (39)}, /* other */
516 COSTS_N_INSNS (1), /* cost of movsx */
517 COSTS_N_INSNS (1), /* cost of movzx */
518 8, /* "large" insn */
519 4, /* MOVE_RATIO */
520 1, /* cost for loading QImode using movzbl */
521 {1, 1, 1}, /* cost of loading integer registers
522 in QImode, HImode and SImode.
523 Relative to reg-reg move (2). */
524 {1, 1, 1}, /* cost of storing integer registers */
525 1, /* cost of reg,reg fld/fst */
526 {1, 1, 1}, /* cost of loading fp registers
527 in SFmode, DFmode and XFmode */
528 {4, 6, 6}, /* cost of storing fp registers
529 in SFmode, DFmode and XFmode */
531 1, /* cost of moving MMX register */
532 {1, 1}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {1, 1}, /* cost of storing MMX registers
535 in SImode and DImode */
536 1, /* cost of moving SSE register */
537 {1, 1, 1}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {1, 1, 1}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 1, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 128, /* size of l2 cache. */
544 32, /* size of prefetch block */
545 1, /* number of parallel prefetches */
546 1, /* Branch cost */
547 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (1), /* cost of FABS instruction. */
551 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
553 geode_memcpy,
554 geode_memset,
555 1, /* scalar_stmt_cost. */
556 1, /* scalar load_cost. */
557 1, /* scalar_store_cost. */
558 1, /* vec_stmt_cost. */
559 1, /* vec_to_scalar_cost. */
560 1, /* scalar_to_vec_cost. */
561 1, /* vec_align_load_cost. */
562 2, /* vec_unalign_load_cost. */
563 1, /* vec_store_cost. */
564 3, /* cond_taken_branch_cost. */
565 1, /* cond_not_taken_branch_cost. */
568 static stringop_algs k6_memcpy[2] = {
569 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
570 DUMMY_STRINGOP_ALGS};
571 static stringop_algs k6_memset[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static const
575 struct processor_costs k6_cost = {
576 COSTS_N_INSNS (1), /* cost of an add instruction */
577 COSTS_N_INSNS (2), /* cost of a lea instruction */
578 COSTS_N_INSNS (1), /* variable shift costs */
579 COSTS_N_INSNS (1), /* constant shift costs */
580 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
581 COSTS_N_INSNS (3), /* HI */
582 COSTS_N_INSNS (3), /* SI */
583 COSTS_N_INSNS (3), /* DI */
584 COSTS_N_INSNS (3)}, /* other */
585 0, /* cost of multiply per each bit set */
586 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
587 COSTS_N_INSNS (18), /* HI */
588 COSTS_N_INSNS (18), /* SI */
589 COSTS_N_INSNS (18), /* DI */
590 COSTS_N_INSNS (18)}, /* other */
591 COSTS_N_INSNS (2), /* cost of movsx */
592 COSTS_N_INSNS (2), /* cost of movzx */
593 8, /* "large" insn */
594 4, /* MOVE_RATIO */
595 3, /* cost for loading QImode using movzbl */
596 {4, 5, 4}, /* cost of loading integer registers
597 in QImode, HImode and SImode.
598 Relative to reg-reg move (2). */
599 {2, 3, 2}, /* cost of storing integer registers */
600 4, /* cost of reg,reg fld/fst */
601 {6, 6, 6}, /* cost of loading fp registers
602 in SFmode, DFmode and XFmode */
603 {4, 4, 4}, /* cost of storing fp registers
604 in SFmode, DFmode and XFmode */
605 2, /* cost of moving MMX register */
606 {2, 2}, /* cost of loading MMX registers
607 in SImode and DImode */
608 {2, 2}, /* cost of storing MMX registers
609 in SImode and DImode */
610 2, /* cost of moving SSE register */
611 {2, 2, 8}, /* cost of loading SSE registers
612 in SImode, DImode and TImode */
613 {2, 2, 8}, /* cost of storing SSE registers
614 in SImode, DImode and TImode */
615 6, /* MMX or SSE register to integer */
616 32, /* size of l1 cache. */
617 32, /* size of l2 cache. Some models
618 have integrated l2 cache, but
619 optimizing for k6 is not important
620 enough to worry about that. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (2), /* cost of FABS instruction. */
628 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
630 k6_memcpy,
631 k6_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 static stringop_algs athlon_memcpy[2] = {
649 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static stringop_algs athlon_memset[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static const
655 struct processor_costs athlon_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (5), /* HI */
662 COSTS_N_INSNS (5), /* SI */
663 COSTS_N_INSNS (5), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
674 9, /* MOVE_RATIO */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 4}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 4, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 256, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 6, /* number of parallel prefetches */
700 5, /* Branch cost */
701 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
707 athlon_memcpy,
708 athlon_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
722 /* K8 has optimized REP instruction for medium sized blocks, but for very
723 small blocks it is better to use loop. For large blocks, libcall can
724 do nontemporary accesses and beat inline considerably. */
725 static stringop_algs k8_memcpy[2] = {
726 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
727 {-1, rep_prefix_4_byte, false}}},
728 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
729 {-1, libcall, false}}}};
730 static stringop_algs k8_memset[2] = {
731 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
732 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
733 {libcall, {{48, unrolled_loop, false},
734 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
735 static const
736 struct processor_costs k8_cost = {
737 COSTS_N_INSNS (1), /* cost of an add instruction */
738 COSTS_N_INSNS (2), /* cost of a lea instruction */
739 COSTS_N_INSNS (1), /* variable shift costs */
740 COSTS_N_INSNS (1), /* constant shift costs */
741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
742 COSTS_N_INSNS (4), /* HI */
743 COSTS_N_INSNS (3), /* SI */
744 COSTS_N_INSNS (4), /* DI */
745 COSTS_N_INSNS (5)}, /* other */
746 0, /* cost of multiply per each bit set */
747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
748 COSTS_N_INSNS (26), /* HI */
749 COSTS_N_INSNS (42), /* SI */
750 COSTS_N_INSNS (74), /* DI */
751 COSTS_N_INSNS (74)}, /* other */
752 COSTS_N_INSNS (1), /* cost of movsx */
753 COSTS_N_INSNS (1), /* cost of movzx */
754 8, /* "large" insn */
755 9, /* MOVE_RATIO */
756 4, /* cost for loading QImode using movzbl */
757 {3, 4, 3}, /* cost of loading integer registers
758 in QImode, HImode and SImode.
759 Relative to reg-reg move (2). */
760 {3, 4, 3}, /* cost of storing integer registers */
761 4, /* cost of reg,reg fld/fst */
762 {4, 4, 12}, /* cost of loading fp registers
763 in SFmode, DFmode and XFmode */
764 {6, 6, 8}, /* cost of storing fp registers
765 in SFmode, DFmode and XFmode */
766 2, /* cost of moving MMX register */
767 {3, 3}, /* cost of loading MMX registers
768 in SImode and DImode */
769 {4, 4}, /* cost of storing MMX registers
770 in SImode and DImode */
771 2, /* cost of moving SSE register */
772 {4, 3, 6}, /* cost of loading SSE registers
773 in SImode, DImode and TImode */
774 {4, 4, 5}, /* cost of storing SSE registers
775 in SImode, DImode and TImode */
776 5, /* MMX or SSE register to integer */
777 64, /* size of l1 cache. */
778 512, /* size of l2 cache. */
779 64, /* size of prefetch block */
780 /* New AMD processors never drop prefetches; if they cannot be performed
781 immediately, they are queued. We set number of simultaneous prefetches
782 to a large constant to reflect this (it probably is not a good idea not
783 to limit number of prefetches at all, as their execution also takes some
784 time). */
785 100, /* number of parallel prefetches */
786 3, /* Branch cost */
787 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
788 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
789 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
790 COSTS_N_INSNS (2), /* cost of FABS instruction. */
791 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
792 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
794 k8_memcpy,
795 k8_memset,
796 4, /* scalar_stmt_cost. */
797 2, /* scalar load_cost. */
798 2, /* scalar_store_cost. */
799 5, /* vec_stmt_cost. */
800 0, /* vec_to_scalar_cost. */
801 2, /* scalar_to_vec_cost. */
802 2, /* vec_align_load_cost. */
803 3, /* vec_unalign_load_cost. */
804 3, /* vec_store_cost. */
805 3, /* cond_taken_branch_cost. */
806 2, /* cond_not_taken_branch_cost. */
809 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
810 very small blocks it is better to use loop. For large blocks, libcall can
811 do nontemporary accesses and beat inline considerably. */
812 static stringop_algs amdfam10_memcpy[2] = {
813 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
814 {-1, rep_prefix_4_byte, false}}},
815 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
816 {-1, libcall, false}}}};
817 static stringop_algs amdfam10_memset[2] = {
818 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
819 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
820 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
821 {-1, libcall, false}}}};
822 struct processor_costs amdfam10_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 512, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 amdfam10_memcpy,
889 amdfam10_memset,
890 4, /* scalar_stmt_cost. */
891 2, /* scalar load_cost. */
892 2, /* scalar_store_cost. */
893 6, /* vec_stmt_cost. */
894 0, /* vec_to_scalar_cost. */
895 2, /* scalar_to_vec_cost. */
896 2, /* vec_align_load_cost. */
897 2, /* vec_unalign_load_cost. */
898 2, /* vec_store_cost. */
899 2, /* cond_taken_branch_cost. */
900 1, /* cond_not_taken_branch_cost. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 static stringop_algs bdver1_memcpy[2] = {
907 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
908 {-1, rep_prefix_4_byte, false}}},
909 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
910 {-1, libcall, false}}}};
911 static stringop_algs bdver1_memset[2] = {
912 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
914 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
915 {-1, libcall, false}}}};
917 const struct processor_costs bdver1_cost = {
918 COSTS_N_INSNS (1), /* cost of an add instruction */
919 COSTS_N_INSNS (1), /* cost of a lea instruction */
920 COSTS_N_INSNS (1), /* variable shift costs */
921 COSTS_N_INSNS (1), /* constant shift costs */
922 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
923 COSTS_N_INSNS (4), /* HI */
924 COSTS_N_INSNS (4), /* SI */
925 COSTS_N_INSNS (6), /* DI */
926 COSTS_N_INSNS (6)}, /* other */
927 0, /* cost of multiply per each bit set */
928 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
929 COSTS_N_INSNS (35), /* HI */
930 COSTS_N_INSNS (51), /* SI */
931 COSTS_N_INSNS (83), /* DI */
932 COSTS_N_INSNS (83)}, /* other */
933 COSTS_N_INSNS (1), /* cost of movsx */
934 COSTS_N_INSNS (1), /* cost of movzx */
935 8, /* "large" insn */
936 9, /* MOVE_RATIO */
937 4, /* cost for loading QImode using movzbl */
938 {5, 5, 4}, /* cost of loading integer registers
939 in QImode, HImode and SImode.
940 Relative to reg-reg move (2). */
941 {4, 4, 4}, /* cost of storing integer registers */
942 2, /* cost of reg,reg fld/fst */
943 {5, 5, 12}, /* cost of loading fp registers
944 in SFmode, DFmode and XFmode */
945 {4, 4, 8}, /* cost of storing fp registers
946 in SFmode, DFmode and XFmode */
947 2, /* cost of moving MMX register */
948 {4, 4}, /* cost of loading MMX registers
949 in SImode and DImode */
950 {4, 4}, /* cost of storing MMX registers
951 in SImode and DImode */
952 2, /* cost of moving SSE register */
953 {4, 4, 4}, /* cost of loading SSE registers
954 in SImode, DImode and TImode */
955 {4, 4, 4}, /* cost of storing SSE registers
956 in SImode, DImode and TImode */
957 2, /* MMX or SSE register to integer */
958 /* On K8:
959 MOVD reg64, xmmreg Double FSTORE 4
960 MOVD reg32, xmmreg Double FSTORE 4
961 On AMDFAM10:
962 MOVD reg64, xmmreg Double FADD 3
963 1/1 1/1
964 MOVD reg32, xmmreg Double FADD 3
965 1/1 1/1 */
966 16, /* size of l1 cache. */
967 2048, /* size of l2 cache. */
968 64, /* size of prefetch block */
969 /* New AMD processors never drop prefetches; if they cannot be performed
970 immediately, they are queued. We set number of simultaneous prefetches
971 to a large constant to reflect this (it probably is not a good idea not
972 to limit number of prefetches at all, as their execution also takes some
973 time). */
974 100, /* number of parallel prefetches */
975 2, /* Branch cost */
976 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
977 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
978 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
979 COSTS_N_INSNS (2), /* cost of FABS instruction. */
980 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
981 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
983 bdver1_memcpy,
984 bdver1_memset,
985 6, /* scalar_stmt_cost. */
986 4, /* scalar load_cost. */
987 4, /* scalar_store_cost. */
988 6, /* vec_stmt_cost. */
989 0, /* vec_to_scalar_cost. */
990 2, /* scalar_to_vec_cost. */
991 4, /* vec_align_load_cost. */
992 4, /* vec_unalign_load_cost. */
993 4, /* vec_store_cost. */
994 2, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
998 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
999 very small blocks it is better to use loop. For large blocks, libcall
1000 can do nontemporary accesses and beat inline considerably. */
1002 static stringop_algs bdver2_memcpy[2] = {
1003 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1004 {-1, rep_prefix_4_byte, false}}},
1005 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1006 {-1, libcall, false}}}};
1007 static stringop_algs bdver2_memset[2] = {
1008 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1009 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1010 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1011 {-1, libcall, false}}}};
1013 const struct processor_costs bdver2_cost = {
1014 COSTS_N_INSNS (1), /* cost of an add instruction */
1015 COSTS_N_INSNS (1), /* cost of a lea instruction */
1016 COSTS_N_INSNS (1), /* variable shift costs */
1017 COSTS_N_INSNS (1), /* constant shift costs */
1018 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1019 COSTS_N_INSNS (4), /* HI */
1020 COSTS_N_INSNS (4), /* SI */
1021 COSTS_N_INSNS (6), /* DI */
1022 COSTS_N_INSNS (6)}, /* other */
1023 0, /* cost of multiply per each bit set */
1024 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1025 COSTS_N_INSNS (35), /* HI */
1026 COSTS_N_INSNS (51), /* SI */
1027 COSTS_N_INSNS (83), /* DI */
1028 COSTS_N_INSNS (83)}, /* other */
1029 COSTS_N_INSNS (1), /* cost of movsx */
1030 COSTS_N_INSNS (1), /* cost of movzx */
1031 8, /* "large" insn */
1032 9, /* MOVE_RATIO */
1033 4, /* cost for loading QImode using movzbl */
1034 {5, 5, 4}, /* cost of loading integer registers
1035 in QImode, HImode and SImode.
1036 Relative to reg-reg move (2). */
1037 {4, 4, 4}, /* cost of storing integer registers */
1038 2, /* cost of reg,reg fld/fst */
1039 {5, 5, 12}, /* cost of loading fp registers
1040 in SFmode, DFmode and XFmode */
1041 {4, 4, 8}, /* cost of storing fp registers
1042 in SFmode, DFmode and XFmode */
1043 2, /* cost of moving MMX register */
1044 {4, 4}, /* cost of loading MMX registers
1045 in SImode and DImode */
1046 {4, 4}, /* cost of storing MMX registers
1047 in SImode and DImode */
1048 2, /* cost of moving SSE register */
1049 {4, 4, 4}, /* cost of loading SSE registers
1050 in SImode, DImode and TImode */
1051 {4, 4, 4}, /* cost of storing SSE registers
1052 in SImode, DImode and TImode */
1053 2, /* MMX or SSE register to integer */
1054 /* On K8:
1055 MOVD reg64, xmmreg Double FSTORE 4
1056 MOVD reg32, xmmreg Double FSTORE 4
1057 On AMDFAM10:
1058 MOVD reg64, xmmreg Double FADD 3
1059 1/1 1/1
1060 MOVD reg32, xmmreg Double FADD 3
1061 1/1 1/1 */
1062 16, /* size of l1 cache. */
1063 2048, /* size of l2 cache. */
1064 64, /* size of prefetch block */
1065 /* New AMD processors never drop prefetches; if they cannot be performed
1066 immediately, they are queued. We set number of simultaneous prefetches
1067 to a large constant to reflect this (it probably is not a good idea not
1068 to limit number of prefetches at all, as their execution also takes some
1069 time). */
1070 100, /* number of parallel prefetches */
1071 2, /* Branch cost */
1072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1073 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1074 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1075 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1076 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1077 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1079 bdver2_memcpy,
1080 bdver2_memset,
1081 6, /* scalar_stmt_cost. */
1082 4, /* scalar load_cost. */
1083 4, /* scalar_store_cost. */
1084 6, /* vec_stmt_cost. */
1085 0, /* vec_to_scalar_cost. */
1086 2, /* scalar_to_vec_cost. */
1087 4, /* vec_align_load_cost. */
1088 4, /* vec_unalign_load_cost. */
1089 4, /* vec_store_cost. */
1090 2, /* cond_taken_branch_cost. */
1091 1, /* cond_not_taken_branch_cost. */
1095 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1096 very small blocks it is better to use loop. For large blocks, libcall
1097 can do nontemporary accesses and beat inline considerably. */
1098 static stringop_algs bdver3_memcpy[2] = {
1099 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1100 {-1, rep_prefix_4_byte, false}}},
1101 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1102 {-1, libcall, false}}}};
1103 static stringop_algs bdver3_memset[2] = {
1104 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1105 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1106 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1107 {-1, libcall, false}}}};
1108 struct processor_costs bdver3_cost = {
1109 COSTS_N_INSNS (1), /* cost of an add instruction */
1110 COSTS_N_INSNS (1), /* cost of a lea instruction */
1111 COSTS_N_INSNS (1), /* variable shift costs */
1112 COSTS_N_INSNS (1), /* constant shift costs */
1113 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1114 COSTS_N_INSNS (4), /* HI */
1115 COSTS_N_INSNS (4), /* SI */
1116 COSTS_N_INSNS (6), /* DI */
1117 COSTS_N_INSNS (6)}, /* other */
1118 0, /* cost of multiply per each bit set */
1119 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1120 COSTS_N_INSNS (35), /* HI */
1121 COSTS_N_INSNS (51), /* SI */
1122 COSTS_N_INSNS (83), /* DI */
1123 COSTS_N_INSNS (83)}, /* other */
1124 COSTS_N_INSNS (1), /* cost of movsx */
1125 COSTS_N_INSNS (1), /* cost of movzx */
1126 8, /* "large" insn */
1127 9, /* MOVE_RATIO */
1128 4, /* cost for loading QImode using movzbl */
1129 {5, 5, 4}, /* cost of loading integer registers
1130 in QImode, HImode and SImode.
1131 Relative to reg-reg move (2). */
1132 {4, 4, 4}, /* cost of storing integer registers */
1133 2, /* cost of reg,reg fld/fst */
1134 {5, 5, 12}, /* cost of loading fp registers
1135 in SFmode, DFmode and XFmode */
1136 {4, 4, 8}, /* cost of storing fp registers
1137 in SFmode, DFmode and XFmode */
1138 2, /* cost of moving MMX register */
1139 {4, 4}, /* cost of loading MMX registers
1140 in SImode and DImode */
1141 {4, 4}, /* cost of storing MMX registers
1142 in SImode and DImode */
1143 2, /* cost of moving SSE register */
1144 {4, 4, 4}, /* cost of loading SSE registers
1145 in SImode, DImode and TImode */
1146 {4, 4, 4}, /* cost of storing SSE registers
1147 in SImode, DImode and TImode */
1148 2, /* MMX or SSE register to integer */
1149 16, /* size of l1 cache. */
1150 2048, /* size of l2 cache. */
1151 64, /* size of prefetch block */
1152 /* New AMD processors never drop prefetches; if they cannot be performed
1153 immediately, they are queued. We set number of simultaneous prefetches
1154 to a large constant to reflect this (it probably is not a good idea not
1155 to limit number of prefetches at all, as their execution also takes some
1156 time). */
1157 100, /* number of parallel prefetches */
1158 2, /* Branch cost */
1159 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1166 bdver3_memcpy,
1167 bdver3_memset,
1168 6, /* scalar_stmt_cost. */
1169 4, /* scalar load_cost. */
1170 4, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 4, /* vec_align_load_cost. */
1175 4, /* vec_unalign_load_cost. */
1176 4, /* vec_store_cost. */
1177 2, /* cond_taken_branch_cost. */
1178 1, /* cond_not_taken_branch_cost. */
1181 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1182 very small blocks it is better to use loop. For large blocks, libcall
1183 can do nontemporary accesses and beat inline considerably. */
1184 static stringop_algs bdver4_memcpy[2] = {
1185 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1186 {-1, rep_prefix_4_byte, false}}},
1187 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1188 {-1, libcall, false}}}};
1189 static stringop_algs bdver4_memset[2] = {
1190 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1191 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1192 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1193 {-1, libcall, false}}}};
1194 struct processor_costs bdver4_cost = {
1195 COSTS_N_INSNS (1), /* cost of an add instruction */
1196 COSTS_N_INSNS (1), /* cost of a lea instruction */
1197 COSTS_N_INSNS (1), /* variable shift costs */
1198 COSTS_N_INSNS (1), /* constant shift costs */
1199 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1200 COSTS_N_INSNS (4), /* HI */
1201 COSTS_N_INSNS (4), /* SI */
1202 COSTS_N_INSNS (6), /* DI */
1203 COSTS_N_INSNS (6)}, /* other */
1204 0, /* cost of multiply per each bit set */
1205 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1206 COSTS_N_INSNS (35), /* HI */
1207 COSTS_N_INSNS (51), /* SI */
1208 COSTS_N_INSNS (83), /* DI */
1209 COSTS_N_INSNS (83)}, /* other */
1210 COSTS_N_INSNS (1), /* cost of movsx */
1211 COSTS_N_INSNS (1), /* cost of movzx */
1212 8, /* "large" insn */
1213 9, /* MOVE_RATIO */
1214 4, /* cost for loading QImode using movzbl */
1215 {5, 5, 4}, /* cost of loading integer registers
1216 in QImode, HImode and SImode.
1217 Relative to reg-reg move (2). */
1218 {4, 4, 4}, /* cost of storing integer registers */
1219 2, /* cost of reg,reg fld/fst */
1220 {5, 5, 12}, /* cost of loading fp registers
1221 in SFmode, DFmode and XFmode */
1222 {4, 4, 8}, /* cost of storing fp registers
1223 in SFmode, DFmode and XFmode */
1224 2, /* cost of moving MMX register */
1225 {4, 4}, /* cost of loading MMX registers
1226 in SImode and DImode */
1227 {4, 4}, /* cost of storing MMX registers
1228 in SImode and DImode */
1229 2, /* cost of moving SSE register */
1230 {4, 4, 4}, /* cost of loading SSE registers
1231 in SImode, DImode and TImode */
1232 {4, 4, 4}, /* cost of storing SSE registers
1233 in SImode, DImode and TImode */
1234 2, /* MMX or SSE register to integer */
1235 16, /* size of l1 cache. */
1236 2048, /* size of l2 cache. */
1237 64, /* size of prefetch block */
1238 /* New AMD processors never drop prefetches; if they cannot be performed
1239 immediately, they are queued. We set number of simultaneous prefetches
1240 to a large constant to reflect this (it probably is not a good idea not
1241 to limit number of prefetches at all, as their execution also takes some
1242 time). */
1243 100, /* number of parallel prefetches */
1244 2, /* Branch cost */
1245 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1246 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1247 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1248 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1249 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1250 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1252 bdver4_memcpy,
1253 bdver4_memset,
1254 6, /* scalar_stmt_cost. */
1255 4, /* scalar load_cost. */
1256 4, /* scalar_store_cost. */
1257 6, /* vec_stmt_cost. */
1258 0, /* vec_to_scalar_cost. */
1259 2, /* scalar_to_vec_cost. */
1260 4, /* vec_align_load_cost. */
1261 4, /* vec_unalign_load_cost. */
1262 4, /* vec_store_cost. */
1263 2, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1267 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1268 very small blocks it is better to use loop. For large blocks, libcall can
1269 do nontemporary accesses and beat inline considerably. */
1270 static stringop_algs btver1_memcpy[2] = {
1271 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1272 {-1, rep_prefix_4_byte, false}}},
1273 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1274 {-1, libcall, false}}}};
1275 static stringop_algs btver1_memset[2] = {
1276 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1277 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1278 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1279 {-1, libcall, false}}}};
1280 const struct processor_costs btver1_cost = {
1281 COSTS_N_INSNS (1), /* cost of an add instruction */
1282 COSTS_N_INSNS (2), /* cost of a lea instruction */
1283 COSTS_N_INSNS (1), /* variable shift costs */
1284 COSTS_N_INSNS (1), /* constant shift costs */
1285 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1286 COSTS_N_INSNS (4), /* HI */
1287 COSTS_N_INSNS (3), /* SI */
1288 COSTS_N_INSNS (4), /* DI */
1289 COSTS_N_INSNS (5)}, /* other */
1290 0, /* cost of multiply per each bit set */
1291 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1292 COSTS_N_INSNS (35), /* HI */
1293 COSTS_N_INSNS (51), /* SI */
1294 COSTS_N_INSNS (83), /* DI */
1295 COSTS_N_INSNS (83)}, /* other */
1296 COSTS_N_INSNS (1), /* cost of movsx */
1297 COSTS_N_INSNS (1), /* cost of movzx */
1298 8, /* "large" insn */
1299 9, /* MOVE_RATIO */
1300 4, /* cost for loading QImode using movzbl */
1301 {3, 4, 3}, /* cost of loading integer registers
1302 in QImode, HImode and SImode.
1303 Relative to reg-reg move (2). */
1304 {3, 4, 3}, /* cost of storing integer registers */
1305 4, /* cost of reg,reg fld/fst */
1306 {4, 4, 12}, /* cost of loading fp registers
1307 in SFmode, DFmode and XFmode */
1308 {6, 6, 8}, /* cost of storing fp registers
1309 in SFmode, DFmode and XFmode */
1310 2, /* cost of moving MMX register */
1311 {3, 3}, /* cost of loading MMX registers
1312 in SImode and DImode */
1313 {4, 4}, /* cost of storing MMX registers
1314 in SImode and DImode */
1315 2, /* cost of moving SSE register */
1316 {4, 4, 3}, /* cost of loading SSE registers
1317 in SImode, DImode and TImode */
1318 {4, 4, 5}, /* cost of storing SSE registers
1319 in SImode, DImode and TImode */
1320 3, /* MMX or SSE register to integer */
1321 /* On K8:
1322 MOVD reg64, xmmreg Double FSTORE 4
1323 MOVD reg32, xmmreg Double FSTORE 4
1324 On AMDFAM10:
1325 MOVD reg64, xmmreg Double FADD 3
1326 1/1 1/1
1327 MOVD reg32, xmmreg Double FADD 3
1328 1/1 1/1 */
1329 32, /* size of l1 cache. */
1330 512, /* size of l2 cache. */
1331 64, /* size of prefetch block */
1332 100, /* number of parallel prefetches */
1333 2, /* Branch cost */
1334 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1335 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1336 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1337 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1338 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1339 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1341 btver1_memcpy,
1342 btver1_memset,
1343 4, /* scalar_stmt_cost. */
1344 2, /* scalar load_cost. */
1345 2, /* scalar_store_cost. */
1346 6, /* vec_stmt_cost. */
1347 0, /* vec_to_scalar_cost. */
1348 2, /* scalar_to_vec_cost. */
1349 2, /* vec_align_load_cost. */
1350 2, /* vec_unalign_load_cost. */
1351 2, /* vec_store_cost. */
1352 2, /* cond_taken_branch_cost. */
1353 1, /* cond_not_taken_branch_cost. */
1356 static stringop_algs btver2_memcpy[2] = {
1357 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1358 {-1, rep_prefix_4_byte, false}}},
1359 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1360 {-1, libcall, false}}}};
1361 static stringop_algs btver2_memset[2] = {
1362 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1363 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1364 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1365 {-1, libcall, false}}}};
1366 const struct processor_costs btver2_cost = {
1367 COSTS_N_INSNS (1), /* cost of an add instruction */
1368 COSTS_N_INSNS (2), /* cost of a lea instruction */
1369 COSTS_N_INSNS (1), /* variable shift costs */
1370 COSTS_N_INSNS (1), /* constant shift costs */
1371 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1372 COSTS_N_INSNS (4), /* HI */
1373 COSTS_N_INSNS (3), /* SI */
1374 COSTS_N_INSNS (4), /* DI */
1375 COSTS_N_INSNS (5)}, /* other */
1376 0, /* cost of multiply per each bit set */
1377 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1378 COSTS_N_INSNS (35), /* HI */
1379 COSTS_N_INSNS (51), /* SI */
1380 COSTS_N_INSNS (83), /* DI */
1381 COSTS_N_INSNS (83)}, /* other */
1382 COSTS_N_INSNS (1), /* cost of movsx */
1383 COSTS_N_INSNS (1), /* cost of movzx */
1384 8, /* "large" insn */
1385 9, /* MOVE_RATIO */
1386 4, /* cost for loading QImode using movzbl */
1387 {3, 4, 3}, /* cost of loading integer registers
1388 in QImode, HImode and SImode.
1389 Relative to reg-reg move (2). */
1390 {3, 4, 3}, /* cost of storing integer registers */
1391 4, /* cost of reg,reg fld/fst */
1392 {4, 4, 12}, /* cost of loading fp registers
1393 in SFmode, DFmode and XFmode */
1394 {6, 6, 8}, /* cost of storing fp registers
1395 in SFmode, DFmode and XFmode */
1396 2, /* cost of moving MMX register */
1397 {3, 3}, /* cost of loading MMX registers
1398 in SImode and DImode */
1399 {4, 4}, /* cost of storing MMX registers
1400 in SImode and DImode */
1401 2, /* cost of moving SSE register */
1402 {4, 4, 3}, /* cost of loading SSE registers
1403 in SImode, DImode and TImode */
1404 {4, 4, 5}, /* cost of storing SSE registers
1405 in SImode, DImode and TImode */
1406 3, /* MMX or SSE register to integer */
1407 /* On K8:
1408 MOVD reg64, xmmreg Double FSTORE 4
1409 MOVD reg32, xmmreg Double FSTORE 4
1410 On AMDFAM10:
1411 MOVD reg64, xmmreg Double FADD 3
1412 1/1 1/1
1413 MOVD reg32, xmmreg Double FADD 3
1414 1/1 1/1 */
1415 32, /* size of l1 cache. */
1416 2048, /* size of l2 cache. */
1417 64, /* size of prefetch block */
1418 100, /* number of parallel prefetches */
1419 2, /* Branch cost */
1420 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1421 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1422 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1423 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1424 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1425 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1426 btver2_memcpy,
1427 btver2_memset,
1428 4, /* scalar_stmt_cost. */
1429 2, /* scalar load_cost. */
1430 2, /* scalar_store_cost. */
1431 6, /* vec_stmt_cost. */
1432 0, /* vec_to_scalar_cost. */
1433 2, /* scalar_to_vec_cost. */
1434 2, /* vec_align_load_cost. */
1435 2, /* vec_unalign_load_cost. */
1436 2, /* vec_store_cost. */
1437 2, /* cond_taken_branch_cost. */
1438 1, /* cond_not_taken_branch_cost. */
1441 static stringop_algs pentium4_memcpy[2] = {
1442 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1443 DUMMY_STRINGOP_ALGS};
1444 static stringop_algs pentium4_memset[2] = {
1445 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1446 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1447 DUMMY_STRINGOP_ALGS};
1449 static const
1450 struct processor_costs pentium4_cost = {
1451 COSTS_N_INSNS (1), /* cost of an add instruction */
1452 COSTS_N_INSNS (3), /* cost of a lea instruction */
1453 COSTS_N_INSNS (4), /* variable shift costs */
1454 COSTS_N_INSNS (4), /* constant shift costs */
1455 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1456 COSTS_N_INSNS (15), /* HI */
1457 COSTS_N_INSNS (15), /* SI */
1458 COSTS_N_INSNS (15), /* DI */
1459 COSTS_N_INSNS (15)}, /* other */
1460 0, /* cost of multiply per each bit set */
1461 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1462 COSTS_N_INSNS (56), /* HI */
1463 COSTS_N_INSNS (56), /* SI */
1464 COSTS_N_INSNS (56), /* DI */
1465 COSTS_N_INSNS (56)}, /* other */
1466 COSTS_N_INSNS (1), /* cost of movsx */
1467 COSTS_N_INSNS (1), /* cost of movzx */
1468 16, /* "large" insn */
1469 6, /* MOVE_RATIO */
1470 2, /* cost for loading QImode using movzbl */
1471 {4, 5, 4}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {2, 3, 2}, /* cost of storing integer registers */
1475 2, /* cost of reg,reg fld/fst */
1476 {2, 2, 6}, /* cost of loading fp registers
1477 in SFmode, DFmode and XFmode */
1478 {4, 4, 6}, /* cost of storing fp registers
1479 in SFmode, DFmode and XFmode */
1480 2, /* cost of moving MMX register */
1481 {2, 2}, /* cost of loading MMX registers
1482 in SImode and DImode */
1483 {2, 2}, /* cost of storing MMX registers
1484 in SImode and DImode */
1485 12, /* cost of moving SSE register */
1486 {12, 12, 12}, /* cost of loading SSE registers
1487 in SImode, DImode and TImode */
1488 {2, 2, 8}, /* cost of storing SSE registers
1489 in SImode, DImode and TImode */
1490 10, /* MMX or SSE register to integer */
1491 8, /* size of l1 cache. */
1492 256, /* size of l2 cache. */
1493 64, /* size of prefetch block */
1494 6, /* number of parallel prefetches */
1495 2, /* Branch cost */
1496 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1497 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1498 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1501 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1502 pentium4_memcpy,
1503 pentium4_memset,
1504 1, /* scalar_stmt_cost. */
1505 1, /* scalar load_cost. */
1506 1, /* scalar_store_cost. */
1507 1, /* vec_stmt_cost. */
1508 1, /* vec_to_scalar_cost. */
1509 1, /* scalar_to_vec_cost. */
1510 1, /* vec_align_load_cost. */
1511 2, /* vec_unalign_load_cost. */
1512 1, /* vec_store_cost. */
1513 3, /* cond_taken_branch_cost. */
1514 1, /* cond_not_taken_branch_cost. */
1517 static stringop_algs nocona_memcpy[2] = {
1518 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1519 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1520 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1522 static stringop_algs nocona_memset[2] = {
1523 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1524 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1525 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1526 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1528 static const
1529 struct processor_costs nocona_cost = {
1530 COSTS_N_INSNS (1), /* cost of an add instruction */
1531 COSTS_N_INSNS (1), /* cost of a lea instruction */
1532 COSTS_N_INSNS (1), /* variable shift costs */
1533 COSTS_N_INSNS (1), /* constant shift costs */
1534 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1535 COSTS_N_INSNS (10), /* HI */
1536 COSTS_N_INSNS (10), /* SI */
1537 COSTS_N_INSNS (10), /* DI */
1538 COSTS_N_INSNS (10)}, /* other */
1539 0, /* cost of multiply per each bit set */
1540 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1541 COSTS_N_INSNS (66), /* HI */
1542 COSTS_N_INSNS (66), /* SI */
1543 COSTS_N_INSNS (66), /* DI */
1544 COSTS_N_INSNS (66)}, /* other */
1545 COSTS_N_INSNS (1), /* cost of movsx */
1546 COSTS_N_INSNS (1), /* cost of movzx */
1547 16, /* "large" insn */
1548 17, /* MOVE_RATIO */
1549 4, /* cost for loading QImode using movzbl */
1550 {4, 4, 4}, /* cost of loading integer registers
1551 in QImode, HImode and SImode.
1552 Relative to reg-reg move (2). */
1553 {4, 4, 4}, /* cost of storing integer registers */
1554 3, /* cost of reg,reg fld/fst */
1555 {12, 12, 12}, /* cost of loading fp registers
1556 in SFmode, DFmode and XFmode */
1557 {4, 4, 4}, /* cost of storing fp registers
1558 in SFmode, DFmode and XFmode */
1559 6, /* cost of moving MMX register */
1560 {12, 12}, /* cost of loading MMX registers
1561 in SImode and DImode */
1562 {12, 12}, /* cost of storing MMX registers
1563 in SImode and DImode */
1564 6, /* cost of moving SSE register */
1565 {12, 12, 12}, /* cost of loading SSE registers
1566 in SImode, DImode and TImode */
1567 {12, 12, 12}, /* cost of storing SSE registers
1568 in SImode, DImode and TImode */
1569 8, /* MMX or SSE register to integer */
1570 8, /* size of l1 cache. */
1571 1024, /* size of l2 cache. */
1572 64, /* size of prefetch block */
1573 8, /* number of parallel prefetches */
1574 1, /* Branch cost */
1575 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1576 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1577 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1578 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1579 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1580 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1581 nocona_memcpy,
1582 nocona_memset,
1583 1, /* scalar_stmt_cost. */
1584 1, /* scalar load_cost. */
1585 1, /* scalar_store_cost. */
1586 1, /* vec_stmt_cost. */
1587 1, /* vec_to_scalar_cost. */
1588 1, /* scalar_to_vec_cost. */
1589 1, /* vec_align_load_cost. */
1590 2, /* vec_unalign_load_cost. */
1591 1, /* vec_store_cost. */
1592 3, /* cond_taken_branch_cost. */
1593 1, /* cond_not_taken_branch_cost. */
1596 static stringop_algs atom_memcpy[2] = {
1597 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1598 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1599 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1600 static stringop_algs atom_memset[2] = {
1601 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1602 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1604 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs atom_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (4), /* HI */
1613 COSTS_N_INSNS (3), /* SI */
1614 COSTS_N_INSNS (4), /* DI */
1615 COSTS_N_INSNS (2)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (26), /* HI */
1619 COSTS_N_INSNS (42), /* SI */
1620 COSTS_N_INSNS (74), /* DI */
1621 COSTS_N_INSNS (74)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 8, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 4, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {6, 6, 8}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {8, 8}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {8, 8}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 2, /* cost of moving SSE register */
1642 {8, 8, 8}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {8, 8, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 5, /* MMX or SSE register to integer */
1647 32, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 3, /* Branch cost */
1652 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1658 atom_memcpy,
1659 atom_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1673 static stringop_algs slm_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs slm_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs slm_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (3), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 slm_memcpy,
1736 slm_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1750 static stringop_algs intel_memcpy[2] = {
1751 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1752 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1754 static stringop_algs intel_memset[2] = {
1755 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1756 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1757 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1758 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1759 static const
1760 struct processor_costs intel_cost = {
1761 COSTS_N_INSNS (1), /* cost of an add instruction */
1762 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1763 COSTS_N_INSNS (1), /* variable shift costs */
1764 COSTS_N_INSNS (1), /* constant shift costs */
1765 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1766 COSTS_N_INSNS (3), /* HI */
1767 COSTS_N_INSNS (3), /* SI */
1768 COSTS_N_INSNS (4), /* DI */
1769 COSTS_N_INSNS (2)}, /* other */
1770 0, /* cost of multiply per each bit set */
1771 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1772 COSTS_N_INSNS (26), /* HI */
1773 COSTS_N_INSNS (42), /* SI */
1774 COSTS_N_INSNS (74), /* DI */
1775 COSTS_N_INSNS (74)}, /* other */
1776 COSTS_N_INSNS (1), /* cost of movsx */
1777 COSTS_N_INSNS (1), /* cost of movzx */
1778 8, /* "large" insn */
1779 17, /* MOVE_RATIO */
1780 4, /* cost for loading QImode using movzbl */
1781 {4, 4, 4}, /* cost of loading integer registers
1782 in QImode, HImode and SImode.
1783 Relative to reg-reg move (2). */
1784 {4, 4, 4}, /* cost of storing integer registers */
1785 4, /* cost of reg,reg fld/fst */
1786 {12, 12, 12}, /* cost of loading fp registers
1787 in SFmode, DFmode and XFmode */
1788 {6, 6, 8}, /* cost of storing fp registers
1789 in SFmode, DFmode and XFmode */
1790 2, /* cost of moving MMX register */
1791 {8, 8}, /* cost of loading MMX registers
1792 in SImode and DImode */
1793 {8, 8}, /* cost of storing MMX registers
1794 in SImode and DImode */
1795 2, /* cost of moving SSE register */
1796 {8, 8, 8}, /* cost of loading SSE registers
1797 in SImode, DImode and TImode */
1798 {8, 8, 8}, /* cost of storing SSE registers
1799 in SImode, DImode and TImode */
1800 5, /* MMX or SSE register to integer */
1801 32, /* size of l1 cache. */
1802 256, /* size of l2 cache. */
1803 64, /* size of prefetch block */
1804 6, /* number of parallel prefetches */
1805 3, /* Branch cost */
1806 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1807 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1808 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1809 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1810 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1811 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1812 intel_memcpy,
1813 intel_memset,
1814 1, /* scalar_stmt_cost. */
1815 1, /* scalar load_cost. */
1816 1, /* scalar_store_cost. */
1817 1, /* vec_stmt_cost. */
1818 1, /* vec_to_scalar_cost. */
1819 1, /* scalar_to_vec_cost. */
1820 1, /* vec_align_load_cost. */
1821 2, /* vec_unalign_load_cost. */
1822 1, /* vec_store_cost. */
1823 3, /* cond_taken_branch_cost. */
1824 1, /* cond_not_taken_branch_cost. */
1827 /* Generic should produce code tuned for Core-i7 (and newer chips)
1828 and btver1 (and newer chips). */
1830 static stringop_algs generic_memcpy[2] = {
1831 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1832 {-1, libcall, false}}},
1833 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1834 {-1, libcall, false}}}};
1835 static stringop_algs generic_memset[2] = {
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1837 {-1, libcall, false}}},
1838 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1839 {-1, libcall, false}}}};
1840 static const
1841 struct processor_costs generic_cost = {
1842 COSTS_N_INSNS (1), /* cost of an add instruction */
1843 /* On all chips taken into consideration lea is 2 cycles and more. With
1844 this cost however our current implementation of synth_mult results in
1845 use of unnecessary temporary registers causing regression on several
1846 SPECfp benchmarks. */
1847 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1848 COSTS_N_INSNS (1), /* variable shift costs */
1849 COSTS_N_INSNS (1), /* constant shift costs */
1850 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1851 COSTS_N_INSNS (4), /* HI */
1852 COSTS_N_INSNS (3), /* SI */
1853 COSTS_N_INSNS (4), /* DI */
1854 COSTS_N_INSNS (2)}, /* other */
1855 0, /* cost of multiply per each bit set */
1856 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1857 COSTS_N_INSNS (26), /* HI */
1858 COSTS_N_INSNS (42), /* SI */
1859 COSTS_N_INSNS (74), /* DI */
1860 COSTS_N_INSNS (74)}, /* other */
1861 COSTS_N_INSNS (1), /* cost of movsx */
1862 COSTS_N_INSNS (1), /* cost of movzx */
1863 8, /* "large" insn */
1864 17, /* MOVE_RATIO */
1865 4, /* cost for loading QImode using movzbl */
1866 {4, 4, 4}, /* cost of loading integer registers
1867 in QImode, HImode and SImode.
1868 Relative to reg-reg move (2). */
1869 {4, 4, 4}, /* cost of storing integer registers */
1870 4, /* cost of reg,reg fld/fst */
1871 {12, 12, 12}, /* cost of loading fp registers
1872 in SFmode, DFmode and XFmode */
1873 {6, 6, 8}, /* cost of storing fp registers
1874 in SFmode, DFmode and XFmode */
1875 2, /* cost of moving MMX register */
1876 {8, 8}, /* cost of loading MMX registers
1877 in SImode and DImode */
1878 {8, 8}, /* cost of storing MMX registers
1879 in SImode and DImode */
1880 2, /* cost of moving SSE register */
1881 {8, 8, 8}, /* cost of loading SSE registers
1882 in SImode, DImode and TImode */
1883 {8, 8, 8}, /* cost of storing SSE registers
1884 in SImode, DImode and TImode */
1885 5, /* MMX or SSE register to integer */
1886 32, /* size of l1 cache. */
1887 512, /* size of l2 cache. */
1888 64, /* size of prefetch block */
1889 6, /* number of parallel prefetches */
1890 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1891 value is increased to perhaps more appropriate value of 5. */
1892 3, /* Branch cost */
1893 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1894 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1895 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1896 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1897 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1898 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1899 generic_memcpy,
1900 generic_memset,
1901 1, /* scalar_stmt_cost. */
1902 1, /* scalar load_cost. */
1903 1, /* scalar_store_cost. */
1904 1, /* vec_stmt_cost. */
1905 1, /* vec_to_scalar_cost. */
1906 1, /* scalar_to_vec_cost. */
1907 1, /* vec_align_load_cost. */
1908 2, /* vec_unalign_load_cost. */
1909 1, /* vec_store_cost. */
1910 3, /* cond_taken_branch_cost. */
1911 1, /* cond_not_taken_branch_cost. */
1914 /* core_cost should produce code tuned for Core familly of CPUs. */
1915 static stringop_algs core_memcpy[2] = {
1916 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1917 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1918 {-1, libcall, false}}}};
1919 static stringop_algs core_memset[2] = {
1920 {libcall, {{6, loop_1_byte, true},
1921 {24, loop, true},
1922 {8192, rep_prefix_4_byte, true},
1923 {-1, libcall, false}}},
1924 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1925 {-1, libcall, false}}}};
1927 static const
1928 struct processor_costs core_cost = {
1929 COSTS_N_INSNS (1), /* cost of an add instruction */
1930 /* On all chips taken into consideration lea is 2 cycles and more. With
1931 this cost however our current implementation of synth_mult results in
1932 use of unnecessary temporary registers causing regression on several
1933 SPECfp benchmarks. */
1934 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1935 COSTS_N_INSNS (1), /* variable shift costs */
1936 COSTS_N_INSNS (1), /* constant shift costs */
1937 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1938 COSTS_N_INSNS (4), /* HI */
1939 COSTS_N_INSNS (3), /* SI */
1940 COSTS_N_INSNS (4), /* DI */
1941 COSTS_N_INSNS (2)}, /* other */
1942 0, /* cost of multiply per each bit set */
1943 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1944 COSTS_N_INSNS (26), /* HI */
1945 COSTS_N_INSNS (42), /* SI */
1946 COSTS_N_INSNS (74), /* DI */
1947 COSTS_N_INSNS (74)}, /* other */
1948 COSTS_N_INSNS (1), /* cost of movsx */
1949 COSTS_N_INSNS (1), /* cost of movzx */
1950 8, /* "large" insn */
1951 17, /* MOVE_RATIO */
1952 4, /* cost for loading QImode using movzbl */
1953 {4, 4, 4}, /* cost of loading integer registers
1954 in QImode, HImode and SImode.
1955 Relative to reg-reg move (2). */
1956 {4, 4, 4}, /* cost of storing integer registers */
1957 4, /* cost of reg,reg fld/fst */
1958 {12, 12, 12}, /* cost of loading fp registers
1959 in SFmode, DFmode and XFmode */
1960 {6, 6, 8}, /* cost of storing fp registers
1961 in SFmode, DFmode and XFmode */
1962 2, /* cost of moving MMX register */
1963 {8, 8}, /* cost of loading MMX registers
1964 in SImode and DImode */
1965 {8, 8}, /* cost of storing MMX registers
1966 in SImode and DImode */
1967 2, /* cost of moving SSE register */
1968 {8, 8, 8}, /* cost of loading SSE registers
1969 in SImode, DImode and TImode */
1970 {8, 8, 8}, /* cost of storing SSE registers
1971 in SImode, DImode and TImode */
1972 5, /* MMX or SSE register to integer */
1973 64, /* size of l1 cache. */
1974 512, /* size of l2 cache. */
1975 64, /* size of prefetch block */
1976 6, /* number of parallel prefetches */
1977 /* FIXME perhaps more appropriate value is 5. */
1978 3, /* Branch cost */
1979 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1980 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1981 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1982 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1983 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1984 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1985 core_memcpy,
1986 core_memset,
1987 1, /* scalar_stmt_cost. */
1988 1, /* scalar load_cost. */
1989 1, /* scalar_store_cost. */
1990 1, /* vec_stmt_cost. */
1991 1, /* vec_to_scalar_cost. */
1992 1, /* scalar_to_vec_cost. */
1993 1, /* vec_align_load_cost. */
1994 2, /* vec_unalign_load_cost. */
1995 1, /* vec_store_cost. */
1996 3, /* cond_taken_branch_cost. */
1997 1, /* cond_not_taken_branch_cost. */
2001 /* Set by -mtune. */
2002 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2004 /* Set by -mtune or -Os. */
2005 const struct processor_costs *ix86_cost = &pentium_cost;
2007 /* Processor feature/optimization bitmasks. */
2008 #define m_386 (1<<PROCESSOR_I386)
2009 #define m_486 (1<<PROCESSOR_I486)
2010 #define m_PENT (1<<PROCESSOR_PENTIUM)
2011 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2012 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2013 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2014 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2015 #define m_CORE2 (1<<PROCESSOR_CORE2)
2016 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2017 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2018 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2019 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2020 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2021 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2022 #define m_INTEL (1<<PROCESSOR_INTEL)
2024 #define m_GEODE (1<<PROCESSOR_GEODE)
2025 #define m_K6 (1<<PROCESSOR_K6)
2026 #define m_K6_GEODE (m_K6 | m_GEODE)
2027 #define m_K8 (1<<PROCESSOR_K8)
2028 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2029 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2030 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2031 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2032 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2033 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2034 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2035 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2036 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2037 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2038 #define m_BTVER (m_BTVER1 | m_BTVER2)
2039 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2041 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2043 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2044 #undef DEF_TUNE
2045 #define DEF_TUNE(tune, name, selector) name,
2046 #include "x86-tune.def"
2047 #undef DEF_TUNE
2050 /* Feature tests against the various tunings. */
2051 unsigned char ix86_tune_features[X86_TUNE_LAST];
2053 /* Feature tests against the various tunings used to create ix86_tune_features
2054 based on the processor mask. */
2055 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2056 #undef DEF_TUNE
2057 #define DEF_TUNE(tune, name, selector) selector,
2058 #include "x86-tune.def"
2059 #undef DEF_TUNE
2062 /* Feature tests against the various architecture variations. */
2063 unsigned char ix86_arch_features[X86_ARCH_LAST];
2065 /* Feature tests against the various architecture variations, used to create
2066 ix86_arch_features based on the processor mask. */
2067 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2068 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2069 ~(m_386 | m_486 | m_PENT | m_K6),
2071 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2072 ~m_386,
2074 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2075 ~(m_386 | m_486),
2077 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2078 ~m_386,
2080 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2081 ~m_386,
2084 /* In case the average insn count for single function invocation is
2085 lower than this constant, emit fast (but longer) prologue and
2086 epilogue code. */
2087 #define FAST_PROLOGUE_INSN_COUNT 20
2089 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2090 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2091 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2092 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2094 /* Array of the smallest class containing reg number REGNO, indexed by
2095 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2097 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2099 /* ax, dx, cx, bx */
2100 AREG, DREG, CREG, BREG,
2101 /* si, di, bp, sp */
2102 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2103 /* FP registers */
2104 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2105 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2106 /* arg pointer */
2107 NON_Q_REGS,
2108 /* flags, fpsr, fpcr, frame */
2109 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2110 /* SSE registers */
2111 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2112 SSE_REGS, SSE_REGS,
2113 /* MMX registers */
2114 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2115 MMX_REGS, MMX_REGS,
2116 /* REX registers */
2117 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2118 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2119 /* SSE REX registers */
2120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2121 SSE_REGS, SSE_REGS,
2122 /* AVX-512 SSE registers */
2123 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2124 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2125 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 /* Mask registers. */
2128 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2129 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 /* The "default" register map used in 32bit mode. */
2134 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2145 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2148 /* The "default" register map used in 64bit mode. */
2150 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2152 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2153 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2154 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2155 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2156 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2157 8,9,10,11,12,13,14,15, /* extended integer registers */
2158 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2159 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2160 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2161 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2164 /* Define the register numbers to be used in Dwarf debugging information.
2165 The SVR4 reference port C compiler uses the following register numbers
2166 in its Dwarf output code:
2167 0 for %eax (gcc regno = 0)
2168 1 for %ecx (gcc regno = 2)
2169 2 for %edx (gcc regno = 1)
2170 3 for %ebx (gcc regno = 3)
2171 4 for %esp (gcc regno = 7)
2172 5 for %ebp (gcc regno = 6)
2173 6 for %esi (gcc regno = 4)
2174 7 for %edi (gcc regno = 5)
2175 The following three DWARF register numbers are never generated by
2176 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2177 believes these numbers have these meanings.
2178 8 for %eip (no gcc equivalent)
2179 9 for %eflags (gcc regno = 17)
2180 10 for %trapno (no gcc equivalent)
2181 It is not at all clear how we should number the FP stack registers
2182 for the x86 architecture. If the version of SDB on x86/svr4 were
2183 a bit less brain dead with respect to floating-point then we would
2184 have a precedent to follow with respect to DWARF register numbers
2185 for x86 FP registers, but the SDB on x86/svr4 is so completely
2186 broken with respect to FP registers that it is hardly worth thinking
2187 of it as something to strive for compatibility with.
2188 The version of x86/svr4 SDB I have at the moment does (partially)
2189 seem to believe that DWARF register number 11 is associated with
2190 the x86 register %st(0), but that's about all. Higher DWARF
2191 register numbers don't seem to be associated with anything in
2192 particular, and even for DWARF regno 11, SDB only seems to under-
2193 stand that it should say that a variable lives in %st(0) (when
2194 asked via an `=' command) if we said it was in DWARF regno 11,
2195 but SDB still prints garbage when asked for the value of the
2196 variable in question (via a `/' command).
2197 (Also note that the labels SDB prints for various FP stack regs
2198 when doing an `x' command are all wrong.)
2199 Note that these problems generally don't affect the native SVR4
2200 C compiler because it doesn't allow the use of -O with -g and
2201 because when it is *not* optimizing, it allocates a memory
2202 location for each floating-point variable, and the memory
2203 location is what gets described in the DWARF AT_location
2204 attribute for the variable in question.
2205 Regardless of the severe mental illness of the x86/svr4 SDB, we
2206 do something sensible here and we use the following DWARF
2207 register numbers. Note that these are all stack-top-relative
2208 numbers.
2209 11 for %st(0) (gcc regno = 8)
2210 12 for %st(1) (gcc regno = 9)
2211 13 for %st(2) (gcc regno = 10)
2212 14 for %st(3) (gcc regno = 11)
2213 15 for %st(4) (gcc regno = 12)
2214 16 for %st(5) (gcc regno = 13)
2215 17 for %st(6) (gcc regno = 14)
2216 18 for %st(7) (gcc regno = 15)
2218 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2220 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2221 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2222 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2223 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2224 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2226 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2227 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2229 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2232 /* Define parameter passing and return registers. */
2234 static int const x86_64_int_parameter_registers[6] =
2236 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2239 static int const x86_64_ms_abi_int_parameter_registers[4] =
2241 CX_REG, DX_REG, R8_REG, R9_REG
2244 static int const x86_64_int_return_registers[4] =
2246 AX_REG, DX_REG, DI_REG, SI_REG
2249 /* Additional registers that are clobbered by SYSV calls. */
2251 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2253 SI_REG, DI_REG,
2254 XMM6_REG, XMM7_REG,
2255 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2256 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2259 /* Define the structure for the machine field in struct function. */
2261 struct GTY(()) stack_local_entry {
2262 unsigned short mode;
2263 unsigned short n;
2264 rtx rtl;
2265 struct stack_local_entry *next;
2268 /* Structure describing stack frame layout.
2269 Stack grows downward:
2271 [arguments]
2272 <- ARG_POINTER
2273 saved pc
2275 saved static chain if ix86_static_chain_on_stack
2277 saved frame pointer if frame_pointer_needed
2278 <- HARD_FRAME_POINTER
2279 [saved regs]
2280 <- regs_save_offset
2281 [padding0]
2283 [saved SSE regs]
2284 <- sse_regs_save_offset
2285 [padding1] |
2286 | <- FRAME_POINTER
2287 [va_arg registers] |
2289 [frame] |
2291 [padding2] | = to_allocate
2292 <- STACK_POINTER
2294 struct ix86_frame
2296 int nsseregs;
2297 int nregs;
2298 int va_arg_size;
2299 int red_zone_size;
2300 int outgoing_arguments_size;
2302 /* The offsets relative to ARG_POINTER. */
2303 HOST_WIDE_INT frame_pointer_offset;
2304 HOST_WIDE_INT hard_frame_pointer_offset;
2305 HOST_WIDE_INT stack_pointer_offset;
2306 HOST_WIDE_INT hfp_save_offset;
2307 HOST_WIDE_INT reg_save_offset;
2308 HOST_WIDE_INT sse_reg_save_offset;
2310 /* When save_regs_using_mov is set, emit prologue using
2311 move instead of push instructions. */
2312 bool save_regs_using_mov;
2315 /* Which cpu are we scheduling for. */
2316 enum attr_cpu ix86_schedule;
2318 /* Which cpu are we optimizing for. */
2319 enum processor_type ix86_tune;
2321 /* Which instruction set architecture to use. */
2322 enum processor_type ix86_arch;
2324 /* True if processor has SSE prefetch instruction. */
2325 unsigned char x86_prefetch_sse;
2327 /* -mstackrealign option */
2328 static const char ix86_force_align_arg_pointer_string[]
2329 = "force_align_arg_pointer";
2331 static rtx (*ix86_gen_leave) (void);
2332 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2333 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2334 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2335 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2336 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2339 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2344 /* Preferred alignment for stack boundary in bits. */
2345 unsigned int ix86_preferred_stack_boundary;
2347 /* Alignment for incoming stack boundary in bits specified at
2348 command line. */
2349 static unsigned int ix86_user_incoming_stack_boundary;
2351 /* Default alignment for incoming stack boundary in bits. */
2352 static unsigned int ix86_default_incoming_stack_boundary;
2354 /* Alignment for incoming stack boundary in bits. */
2355 unsigned int ix86_incoming_stack_boundary;
2357 /* Calling abi specific va_list type nodes. */
2358 static GTY(()) tree sysv_va_list_type_node;
2359 static GTY(()) tree ms_va_list_type_node;
2361 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2362 char internal_label_prefix[16];
2363 int internal_label_prefix_len;
2365 /* Fence to use after loop using movnt. */
2366 tree x86_mfence;
2368 /* Register class used for passing given 64bit part of the argument.
2369 These represent classes as documented by the PS ABI, with the exception
2370 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2371 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2373 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2374 whenever possible (upper half does contain padding). */
2375 enum x86_64_reg_class
2377 X86_64_NO_CLASS,
2378 X86_64_INTEGER_CLASS,
2379 X86_64_INTEGERSI_CLASS,
2380 X86_64_SSE_CLASS,
2381 X86_64_SSESF_CLASS,
2382 X86_64_SSEDF_CLASS,
2383 X86_64_SSEUP_CLASS,
2384 X86_64_X87_CLASS,
2385 X86_64_X87UP_CLASS,
2386 X86_64_COMPLEX_X87_CLASS,
2387 X86_64_MEMORY_CLASS
2390 #define MAX_CLASSES 8
2392 /* Table of constants used by fldpi, fldln2, etc.... */
2393 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2394 static bool ext_80387_constants_init = 0;
2397 static struct machine_function * ix86_init_machine_status (void);
2398 static rtx ix86_function_value (const_tree, const_tree, bool);
2399 static bool ix86_function_value_regno_p (const unsigned int);
2400 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2401 const_tree);
2402 static rtx ix86_static_chain (const_tree, bool);
2403 static int ix86_function_regparm (const_tree, const_tree);
2404 static void ix86_compute_frame_layout (struct ix86_frame *);
2405 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2406 rtx, rtx, int);
2407 static void ix86_add_new_builtins (HOST_WIDE_INT);
2408 static tree ix86_canonical_va_list_type (tree);
2409 static void predict_jump (int);
2410 static unsigned int split_stack_prologue_scratch_regno (void);
2411 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2413 enum ix86_function_specific_strings
2415 IX86_FUNCTION_SPECIFIC_ARCH,
2416 IX86_FUNCTION_SPECIFIC_TUNE,
2417 IX86_FUNCTION_SPECIFIC_MAX
2420 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2421 const char *, enum fpmath_unit, bool);
2422 static void ix86_function_specific_save (struct cl_target_option *,
2423 struct gcc_options *opts);
2424 static void ix86_function_specific_restore (struct gcc_options *opts,
2425 struct cl_target_option *);
2426 static void ix86_function_specific_print (FILE *, int,
2427 struct cl_target_option *);
2428 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2429 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2430 struct gcc_options *,
2431 struct gcc_options *,
2432 struct gcc_options *);
2433 static bool ix86_can_inline_p (tree, tree);
2434 static void ix86_set_current_function (tree);
2435 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2437 static enum calling_abi ix86_function_abi (const_tree);
2440 #ifndef SUBTARGET32_DEFAULT_CPU
2441 #define SUBTARGET32_DEFAULT_CPU "i386"
2442 #endif
2444 /* Whether -mtune= or -march= were specified */
2445 static int ix86_tune_defaulted;
2446 static int ix86_arch_specified;
2448 /* Vectorization library interface and handlers. */
2449 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2451 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2452 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2454 /* Processor target table, indexed by processor number */
2455 struct ptt
2457 const char *const name; /* processor name */
2458 const struct processor_costs *cost; /* Processor costs */
2459 const int align_loop; /* Default alignments. */
2460 const int align_loop_max_skip;
2461 const int align_jump;
2462 const int align_jump_max_skip;
2463 const int align_func;
2466 /* This table must be in sync with enum processor_type in i386.h. */
2467 static const struct ptt processor_target_table[PROCESSOR_max] =
2469 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2470 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2471 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2472 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2473 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2474 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2475 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2476 {"core2", &core_cost, 16, 10, 16, 10, 16},
2477 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2478 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2479 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2480 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2481 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2482 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2483 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2484 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2485 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2486 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2487 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2488 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2489 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2490 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2491 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2492 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2493 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2496 static bool
2497 gate_insert_vzeroupper (void)
2499 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2502 static unsigned int
2503 rest_of_handle_insert_vzeroupper (void)
2505 int i;
2507 /* vzeroupper instructions are inserted immediately after reload to
2508 account for possible spills from 256bit registers. The pass
2509 reuses mode switching infrastructure by re-running mode insertion
2510 pass, so disable entities that have already been processed. */
2511 for (i = 0; i < MAX_386_ENTITIES; i++)
2512 ix86_optimize_mode_switching[i] = 0;
2514 ix86_optimize_mode_switching[AVX_U128] = 1;
2516 /* Call optimize_mode_switching. */
2517 g->get_passes ()->execute_pass_mode_switching ();
2518 return 0;
2521 namespace {
2523 const pass_data pass_data_insert_vzeroupper =
2525 RTL_PASS, /* type */
2526 "vzeroupper", /* name */
2527 OPTGROUP_NONE, /* optinfo_flags */
2528 true, /* has_gate */
2529 true, /* has_execute */
2530 TV_NONE, /* tv_id */
2531 0, /* properties_required */
2532 0, /* properties_provided */
2533 0, /* properties_destroyed */
2534 0, /* todo_flags_start */
2535 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2538 class pass_insert_vzeroupper : public rtl_opt_pass
2540 public:
2541 pass_insert_vzeroupper(gcc::context *ctxt)
2542 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2545 /* opt_pass methods: */
2546 bool gate () { return gate_insert_vzeroupper (); }
2547 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2549 }; // class pass_insert_vzeroupper
2551 } // anon namespace
2553 rtl_opt_pass *
2554 make_pass_insert_vzeroupper (gcc::context *ctxt)
2556 return new pass_insert_vzeroupper (ctxt);
2559 /* Return true if a red-zone is in use. */
2561 static inline bool
2562 ix86_using_red_zone (void)
2564 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 /* Return a string that documents the current -m options. The caller is
2568 responsible for freeing the string. */
2570 static char *
2571 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2572 const char *tune, enum fpmath_unit fpmath,
2573 bool add_nl_p)
2575 struct ix86_target_opts
2577 const char *option; /* option string */
2578 HOST_WIDE_INT mask; /* isa mask options */
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2585 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2586 { "-mfma", OPTION_MASK_ISA_FMA },
2587 { "-mxop", OPTION_MASK_ISA_XOP },
2588 { "-mlwp", OPTION_MASK_ISA_LWP },
2589 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2590 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2591 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2592 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2593 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2594 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2595 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2596 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2597 { "-msse3", OPTION_MASK_ISA_SSE3 },
2598 { "-msse2", OPTION_MASK_ISA_SSE2 },
2599 { "-msse", OPTION_MASK_ISA_SSE },
2600 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2601 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2602 { "-mmmx", OPTION_MASK_ISA_MMX },
2603 { "-mabm", OPTION_MASK_ISA_ABM },
2604 { "-mbmi", OPTION_MASK_ISA_BMI },
2605 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2606 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2607 { "-mhle", OPTION_MASK_ISA_HLE },
2608 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2609 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2610 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2611 { "-madx", OPTION_MASK_ISA_ADX },
2612 { "-mtbm", OPTION_MASK_ISA_TBM },
2613 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2614 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2615 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2616 { "-maes", OPTION_MASK_ISA_AES },
2617 { "-msha", OPTION_MASK_ISA_SHA },
2618 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2619 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2620 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2621 { "-mf16c", OPTION_MASK_ISA_F16C },
2622 { "-mrtm", OPTION_MASK_ISA_RTM },
2623 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2624 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2625 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2628 /* Flag options. */
2629 static struct ix86_target_opts flag_opts[] =
2631 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2632 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2633 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2634 { "-m80387", MASK_80387 },
2635 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2636 { "-malign-double", MASK_ALIGN_DOUBLE },
2637 { "-mcld", MASK_CLD },
2638 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2639 { "-mieee-fp", MASK_IEEE_FP },
2640 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2641 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2642 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2643 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2644 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2645 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2646 { "-mno-red-zone", MASK_NO_RED_ZONE },
2647 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2648 { "-mrecip", MASK_RECIP },
2649 { "-mrtd", MASK_RTD },
2650 { "-msseregparm", MASK_SSEREGPARM },
2651 { "-mstack-arg-probe", MASK_STACK_PROBE },
2652 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2653 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2654 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2655 { "-mvzeroupper", MASK_VZEROUPPER },
2656 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2657 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2658 { "-mprefer-avx128", MASK_PREFER_AVX128},
2661 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2663 char isa_other[40];
2664 char target_other[40];
2665 unsigned num = 0;
2666 unsigned i, j;
2667 char *ret;
2668 char *ptr;
2669 size_t len;
2670 size_t line_len;
2671 size_t sep_len;
2672 const char *abi;
2674 memset (opts, '\0', sizeof (opts));
2676 /* Add -march= option. */
2677 if (arch)
2679 opts[num][0] = "-march=";
2680 opts[num++][1] = arch;
2683 /* Add -mtune= option. */
2684 if (tune)
2686 opts[num][0] = "-mtune=";
2687 opts[num++][1] = tune;
2690 /* Add -m32/-m64/-mx32. */
2691 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2693 if ((isa & OPTION_MASK_ABI_64) != 0)
2694 abi = "-m64";
2695 else
2696 abi = "-mx32";
2697 isa &= ~ (OPTION_MASK_ISA_64BIT
2698 | OPTION_MASK_ABI_64
2699 | OPTION_MASK_ABI_X32);
2701 else
2702 abi = "-m32";
2703 opts[num++][0] = abi;
2705 /* Pick out the options in isa options. */
2706 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2708 if ((isa & isa_opts[i].mask) != 0)
2710 opts[num++][0] = isa_opts[i].option;
2711 isa &= ~ isa_opts[i].mask;
2715 if (isa && add_nl_p)
2717 opts[num++][0] = isa_other;
2718 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2719 isa);
2722 /* Add flag options. */
2723 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2725 if ((flags & flag_opts[i].mask) != 0)
2727 opts[num++][0] = flag_opts[i].option;
2728 flags &= ~ flag_opts[i].mask;
2732 if (flags && add_nl_p)
2734 opts[num++][0] = target_other;
2735 sprintf (target_other, "(other flags: %#x)", flags);
2738 /* Add -fpmath= option. */
2739 if (fpmath)
2741 opts[num][0] = "-mfpmath=";
2742 switch ((int) fpmath)
2744 case FPMATH_387:
2745 opts[num++][1] = "387";
2746 break;
2748 case FPMATH_SSE:
2749 opts[num++][1] = "sse";
2750 break;
2752 case FPMATH_387 | FPMATH_SSE:
2753 opts[num++][1] = "sse+387";
2754 break;
2756 default:
2757 gcc_unreachable ();
2761 /* Any options? */
2762 if (num == 0)
2763 return NULL;
2765 gcc_assert (num < ARRAY_SIZE (opts));
2767 /* Size the string. */
2768 len = 0;
2769 sep_len = (add_nl_p) ? 3 : 1;
2770 for (i = 0; i < num; i++)
2772 len += sep_len;
2773 for (j = 0; j < 2; j++)
2774 if (opts[i][j])
2775 len += strlen (opts[i][j]);
2778 /* Build the string. */
2779 ret = ptr = (char *) xmalloc (len);
2780 line_len = 0;
2782 for (i = 0; i < num; i++)
2784 size_t len2[2];
2786 for (j = 0; j < 2; j++)
2787 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2789 if (i != 0)
2791 *ptr++ = ' ';
2792 line_len++;
2794 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2796 *ptr++ = '\\';
2797 *ptr++ = '\n';
2798 line_len = 0;
2802 for (j = 0; j < 2; j++)
2803 if (opts[i][j])
2805 memcpy (ptr, opts[i][j], len2[j]);
2806 ptr += len2[j];
2807 line_len += len2[j];
2811 *ptr = '\0';
2812 gcc_assert (ret + len >= ptr);
2814 return ret;
2817 /* Return true, if profiling code should be emitted before
2818 prologue. Otherwise it returns false.
2819 Note: For x86 with "hotfix" it is sorried. */
2820 static bool
2821 ix86_profile_before_prologue (void)
2823 return flag_fentry != 0;
2826 /* Function that is callable from the debugger to print the current
2827 options. */
2828 void ATTRIBUTE_UNUSED
2829 ix86_debug_options (void)
2831 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2832 ix86_arch_string, ix86_tune_string,
2833 ix86_fpmath, true);
2835 if (opts)
2837 fprintf (stderr, "%s\n\n", opts);
2838 free (opts);
2840 else
2841 fputs ("<no options>\n\n", stderr);
2843 return;
2846 static const char *stringop_alg_names[] = {
2847 #define DEF_ENUM
2848 #define DEF_ALG(alg, name) #name,
2849 #include "stringop.def"
2850 #undef DEF_ENUM
2851 #undef DEF_ALG
2854 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2855 The string is of the following form (or comma separated list of it):
2857 strategy_alg:max_size:[align|noalign]
2859 where the full size range for the strategy is either [0, max_size] or
2860 [min_size, max_size], in which min_size is the max_size + 1 of the
2861 preceding range. The last size range must have max_size == -1.
2863 Examples:
2866 -mmemcpy-strategy=libcall:-1:noalign
2868 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2872 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2874 This is to tell the compiler to use the following strategy for memset
2875 1) when the expected size is between [1, 16], use rep_8byte strategy;
2876 2) when the size is between [17, 2048], use vector_loop;
2877 3) when the size is > 2048, use libcall. */
2879 struct stringop_size_range
2881 int max;
2882 stringop_alg alg;
2883 bool noalign;
2886 static void
2887 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2889 const struct stringop_algs *default_algs;
2890 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2891 char *curr_range_str, *next_range_str;
2892 int i = 0, n = 0;
2894 if (is_memset)
2895 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2896 else
2897 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2899 curr_range_str = strategy_str;
2903 int maxs;
2904 char alg_name[128];
2905 char align[16];
2906 next_range_str = strchr (curr_range_str, ',');
2907 if (next_range_str)
2908 *next_range_str++ = '\0';
2910 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2911 alg_name, &maxs, align))
2913 error ("wrong arg %s to option %s", curr_range_str,
2914 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2915 return;
2918 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2920 error ("size ranges of option %s should be increasing",
2921 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2922 return;
2925 for (i = 0; i < last_alg; i++)
2926 if (!strcmp (alg_name, stringop_alg_names[i]))
2927 break;
2929 if (i == last_alg)
2931 error ("wrong stringop strategy name %s specified for option %s",
2932 alg_name,
2933 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2934 return;
2937 input_ranges[n].max = maxs;
2938 input_ranges[n].alg = (stringop_alg) i;
2939 if (!strcmp (align, "align"))
2940 input_ranges[n].noalign = false;
2941 else if (!strcmp (align, "noalign"))
2942 input_ranges[n].noalign = true;
2943 else
2945 error ("unknown alignment %s specified for option %s",
2946 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2947 return;
2949 n++;
2950 curr_range_str = next_range_str;
2952 while (curr_range_str);
2954 if (input_ranges[n - 1].max != -1)
2956 error ("the max value for the last size range should be -1"
2957 " for option %s",
2958 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2959 return;
2962 if (n > MAX_STRINGOP_ALGS)
2964 error ("too many size ranges specified in option %s",
2965 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2966 return;
2969 /* Now override the default algs array. */
2970 for (i = 0; i < n; i++)
2972 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2973 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2974 = input_ranges[i].alg;
2975 *const_cast<int *>(&default_algs->size[i].noalign)
2976 = input_ranges[i].noalign;
2981 /* parse -mtune-ctrl= option. When DUMP is true,
2982 print the features that are explicitly set. */
2984 static void
2985 parse_mtune_ctrl_str (bool dump)
2987 if (!ix86_tune_ctrl_string)
2988 return;
2990 char *next_feature_string = NULL;
2991 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2992 char *orig = curr_feature_string;
2993 int i;
2996 bool clear = false;
2998 next_feature_string = strchr (curr_feature_string, ',');
2999 if (next_feature_string)
3000 *next_feature_string++ = '\0';
3001 if (*curr_feature_string == '^')
3003 curr_feature_string++;
3004 clear = true;
3006 for (i = 0; i < X86_TUNE_LAST; i++)
3008 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3010 ix86_tune_features[i] = !clear;
3011 if (dump)
3012 fprintf (stderr, "Explicitly %s feature %s\n",
3013 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3014 break;
3017 if (i == X86_TUNE_LAST)
3018 error ("Unknown parameter to option -mtune-ctrl: %s",
3019 clear ? curr_feature_string - 1 : curr_feature_string);
3020 curr_feature_string = next_feature_string;
3022 while (curr_feature_string);
3023 free (orig);
3026 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3027 processor type. */
3029 static void
3030 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3032 unsigned int ix86_tune_mask = 1u << ix86_tune;
3033 int i;
3035 for (i = 0; i < X86_TUNE_LAST; ++i)
3037 if (ix86_tune_no_default)
3038 ix86_tune_features[i] = 0;
3039 else
3040 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3043 if (dump)
3045 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3046 for (i = 0; i < X86_TUNE_LAST; i++)
3047 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3048 ix86_tune_features[i] ? "on" : "off");
3051 parse_mtune_ctrl_str (dump);
3055 /* Override various settings based on options. If MAIN_ARGS_P, the
3056 options are from the command line, otherwise they are from
3057 attributes. */
3059 static void
3060 ix86_option_override_internal (bool main_args_p,
3061 struct gcc_options *opts,
3062 struct gcc_options *opts_set)
3064 int i;
3065 unsigned int ix86_arch_mask;
3066 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3067 const char *prefix;
3068 const char *suffix;
3069 const char *sw;
3071 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3072 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3073 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3074 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3075 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3076 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3077 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3078 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3079 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3080 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3081 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3082 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3083 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3084 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3085 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3086 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3087 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3088 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3089 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3090 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3091 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3092 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3093 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3094 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3095 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3096 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3097 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3098 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3099 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3100 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3101 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3102 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3103 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3104 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3105 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3106 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3107 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3108 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3109 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3110 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3111 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3112 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3113 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3114 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3115 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3116 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3118 #define PTA_CORE2 \
3119 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3120 | PTA_CX16 | PTA_FXSR)
3121 #define PTA_NEHALEM \
3122 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3123 #define PTA_WESTMERE \
3124 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3125 #define PTA_SANDYBRIDGE \
3126 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3127 #define PTA_IVYBRIDGE \
3128 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3129 #define PTA_HASWELL \
3130 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3131 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
3132 #define PTA_BROADWELL \
3133 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3134 #define PTA_BONNELL \
3135 (PTA_CORE2 | PTA_MOVBE)
3136 #define PTA_SILVERMONT \
3137 (PTA_WESTMERE | PTA_MOVBE)
3139 /* if this reaches 64, need to widen struct pta flags below */
3141 static struct pta
3143 const char *const name; /* processor name or nickname. */
3144 const enum processor_type processor;
3145 const enum attr_cpu schedule;
3146 const unsigned HOST_WIDE_INT flags;
3148 const processor_alias_table[] =
3150 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3151 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3152 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3153 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3154 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3155 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3156 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3157 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3158 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3159 PTA_MMX | PTA_SSE | PTA_FXSR},
3160 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3161 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3162 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3163 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3164 PTA_MMX | PTA_SSE | PTA_FXSR},
3165 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3166 PTA_MMX | PTA_SSE | PTA_FXSR},
3167 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3168 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3169 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3170 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3171 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3172 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3173 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3174 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3175 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3176 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3177 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3178 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3179 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3180 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3181 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3182 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3183 PTA_SANDYBRIDGE},
3184 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3185 PTA_SANDYBRIDGE},
3186 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3187 PTA_IVYBRIDGE},
3188 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3189 PTA_IVYBRIDGE},
3190 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3191 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3192 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3193 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3194 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3195 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3196 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3197 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3198 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3199 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3200 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3201 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3202 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3203 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3204 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3205 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3206 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3207 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3209 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3210 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3211 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3212 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3213 {"x86-64", PROCESSOR_K8, CPU_K8,
3214 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3215 {"k8", PROCESSOR_K8, CPU_K8,
3216 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3217 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3218 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3219 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3220 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3221 {"opteron", PROCESSOR_K8, CPU_K8,
3222 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3223 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3224 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3225 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3226 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3227 {"athlon64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3229 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3230 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3231 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3232 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3233 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3238 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3239 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3241 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3242 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3243 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3244 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3245 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3246 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3247 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3248 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3249 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3250 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3251 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3252 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3253 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3254 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3255 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3256 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3257 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3258 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3259 | PTA_XSAVEOPT | PTA_FSGSBASE},
3260 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3261 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3262 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3263 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3264 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3265 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3266 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3267 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3270 | PTA_FXSR | PTA_XSAVE},
3271 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3272 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3273 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3274 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3275 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3276 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3278 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3279 PTA_64BIT
3280 | PTA_HLE /* flags are only used for -march switch. */ },
3283 /* -mrecip options. */
3284 static struct
3286 const char *string; /* option name */
3287 unsigned int mask; /* mask bits to set */
3289 const recip_options[] =
3291 { "all", RECIP_MASK_ALL },
3292 { "none", RECIP_MASK_NONE },
3293 { "div", RECIP_MASK_DIV },
3294 { "sqrt", RECIP_MASK_SQRT },
3295 { "vec-div", RECIP_MASK_VEC_DIV },
3296 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3299 int const pta_size = ARRAY_SIZE (processor_alias_table);
3301 /* Set up prefix/suffix so the error messages refer to either the command
3302 line argument, or the attribute(target). */
3303 if (main_args_p)
3305 prefix = "-m";
3306 suffix = "";
3307 sw = "switch";
3309 else
3311 prefix = "option(\"";
3312 suffix = "\")";
3313 sw = "attribute";
3316 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3317 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3318 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3319 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3320 #ifdef TARGET_BI_ARCH
3321 else
3323 #if TARGET_BI_ARCH == 1
3324 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3325 is on and OPTION_MASK_ABI_X32 is off. We turn off
3326 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3327 -mx32. */
3328 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3329 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3330 #else
3331 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3332 on and OPTION_MASK_ABI_64 is off. We turn off
3333 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3334 -m64. */
3335 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3336 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3337 #endif
3339 #endif
3341 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3343 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3344 OPTION_MASK_ABI_64 for TARGET_X32. */
3345 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3346 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3348 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3349 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3350 | OPTION_MASK_ABI_X32
3351 | OPTION_MASK_ABI_64);
3352 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3354 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3355 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3356 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3357 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3360 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3361 SUBTARGET_OVERRIDE_OPTIONS;
3362 #endif
3364 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3365 SUBSUBTARGET_OVERRIDE_OPTIONS;
3366 #endif
3368 /* -fPIC is the default for x86_64. */
3369 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3370 opts->x_flag_pic = 2;
3372 /* Need to check -mtune=generic first. */
3373 if (opts->x_ix86_tune_string)
3375 /* As special support for cross compilers we read -mtune=native
3376 as -mtune=generic. With native compilers we won't see the
3377 -mtune=native, as it was changed by the driver. */
3378 if (!strcmp (opts->x_ix86_tune_string, "native"))
3380 opts->x_ix86_tune_string = "generic";
3382 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3383 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3384 "%stune=k8%s or %stune=generic%s instead as appropriate",
3385 prefix, suffix, prefix, suffix, prefix, suffix);
3387 else
3389 if (opts->x_ix86_arch_string)
3390 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3391 if (!opts->x_ix86_tune_string)
3393 opts->x_ix86_tune_string
3394 = processor_target_table[TARGET_CPU_DEFAULT].name;
3395 ix86_tune_defaulted = 1;
3398 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3399 or defaulted. We need to use a sensible tune option. */
3400 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3402 opts->x_ix86_tune_string = "generic";
3406 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3407 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3409 /* rep; movq isn't available in 32-bit code. */
3410 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3411 opts->x_ix86_stringop_alg = no_stringop;
3414 if (!opts->x_ix86_arch_string)
3415 opts->x_ix86_arch_string
3416 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3417 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3418 else
3419 ix86_arch_specified = 1;
3421 if (opts_set->x_ix86_pmode)
3423 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3424 && opts->x_ix86_pmode == PMODE_SI)
3425 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3426 && opts->x_ix86_pmode == PMODE_DI))
3427 error ("address mode %qs not supported in the %s bit mode",
3428 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3429 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3431 else
3432 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3433 ? PMODE_DI : PMODE_SI;
3435 if (!opts_set->x_ix86_abi)
3436 opts->x_ix86_abi = DEFAULT_ABI;
3438 /* For targets using ms ABI enable ms-extensions, if not
3439 explicit turned off. For non-ms ABI we turn off this
3440 option. */
3441 if (!opts_set->x_flag_ms_extensions)
3442 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3444 if (opts_set->x_ix86_cmodel)
3446 switch (opts->x_ix86_cmodel)
3448 case CM_SMALL:
3449 case CM_SMALL_PIC:
3450 if (opts->x_flag_pic)
3451 opts->x_ix86_cmodel = CM_SMALL_PIC;
3452 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3453 error ("code model %qs not supported in the %s bit mode",
3454 "small", "32");
3455 break;
3457 case CM_MEDIUM:
3458 case CM_MEDIUM_PIC:
3459 if (opts->x_flag_pic)
3460 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3461 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3462 error ("code model %qs not supported in the %s bit mode",
3463 "medium", "32");
3464 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3465 error ("code model %qs not supported in x32 mode",
3466 "medium");
3467 break;
3469 case CM_LARGE:
3470 case CM_LARGE_PIC:
3471 if (opts->x_flag_pic)
3472 opts->x_ix86_cmodel = CM_LARGE_PIC;
3473 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3474 error ("code model %qs not supported in the %s bit mode",
3475 "large", "32");
3476 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3477 error ("code model %qs not supported in x32 mode",
3478 "large");
3479 break;
3481 case CM_32:
3482 if (opts->x_flag_pic)
3483 error ("code model %s does not support PIC mode", "32");
3484 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3485 error ("code model %qs not supported in the %s bit mode",
3486 "32", "64");
3487 break;
3489 case CM_KERNEL:
3490 if (opts->x_flag_pic)
3492 error ("code model %s does not support PIC mode", "kernel");
3493 opts->x_ix86_cmodel = CM_32;
3495 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3496 error ("code model %qs not supported in the %s bit mode",
3497 "kernel", "32");
3498 break;
3500 default:
3501 gcc_unreachable ();
3504 else
3506 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3507 use of rip-relative addressing. This eliminates fixups that
3508 would otherwise be needed if this object is to be placed in a
3509 DLL, and is essentially just as efficient as direct addressing. */
3510 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3511 && (TARGET_RDOS || TARGET_PECOFF))
3512 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3513 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3514 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3515 else
3516 opts->x_ix86_cmodel = CM_32;
3518 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3520 error ("-masm=intel not supported in this configuration");
3521 opts->x_ix86_asm_dialect = ASM_ATT;
3523 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3524 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3525 sorry ("%i-bit mode not compiled in",
3526 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3528 for (i = 0; i < pta_size; i++)
3529 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3531 ix86_schedule = processor_alias_table[i].schedule;
3532 ix86_arch = processor_alias_table[i].processor;
3533 /* Default cpu tuning to the architecture. */
3534 ix86_tune = ix86_arch;
3536 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3537 && !(processor_alias_table[i].flags & PTA_64BIT))
3538 error ("CPU you selected does not support x86-64 "
3539 "instruction set");
3541 if (processor_alias_table[i].flags & PTA_MMX
3542 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3543 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3544 if (processor_alias_table[i].flags & PTA_3DNOW
3545 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3546 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3547 if (processor_alias_table[i].flags & PTA_3DNOW_A
3548 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3549 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3550 if (processor_alias_table[i].flags & PTA_SSE
3551 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3552 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3553 if (processor_alias_table[i].flags & PTA_SSE2
3554 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3555 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3556 if (processor_alias_table[i].flags & PTA_SSE3
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3559 if (processor_alias_table[i].flags & PTA_SSSE3
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3562 if (processor_alias_table[i].flags & PTA_SSE4_1
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3565 if (processor_alias_table[i].flags & PTA_SSE4_2
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3568 if (processor_alias_table[i].flags & PTA_AVX
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3571 if (processor_alias_table[i].flags & PTA_AVX2
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3574 if (processor_alias_table[i].flags & PTA_FMA
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3577 if (processor_alias_table[i].flags & PTA_SSE4A
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3580 if (processor_alias_table[i].flags & PTA_FMA4
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3583 if (processor_alias_table[i].flags & PTA_XOP
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3586 if (processor_alias_table[i].flags & PTA_LWP
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3589 if (processor_alias_table[i].flags & PTA_ABM
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3592 if (processor_alias_table[i].flags & PTA_BMI
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3595 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3598 if (processor_alias_table[i].flags & PTA_TBM
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3601 if (processor_alias_table[i].flags & PTA_BMI2
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3604 if (processor_alias_table[i].flags & PTA_CX16
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3607 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3610 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3611 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3614 if (processor_alias_table[i].flags & PTA_MOVBE
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3617 if (processor_alias_table[i].flags & PTA_AES
3618 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3619 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3620 if (processor_alias_table[i].flags & PTA_SHA
3621 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3622 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3623 if (processor_alias_table[i].flags & PTA_PCLMUL
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3626 if (processor_alias_table[i].flags & PTA_FSGSBASE
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3629 if (processor_alias_table[i].flags & PTA_RDRND
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3632 if (processor_alias_table[i].flags & PTA_F16C
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3635 if (processor_alias_table[i].flags & PTA_RTM
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3638 if (processor_alias_table[i].flags & PTA_HLE
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3641 if (processor_alias_table[i].flags & PTA_PRFCHW
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3644 if (processor_alias_table[i].flags & PTA_RDSEED
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3647 if (processor_alias_table[i].flags & PTA_ADX
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3650 if (processor_alias_table[i].flags & PTA_FXSR
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3653 if (processor_alias_table[i].flags & PTA_XSAVE
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3656 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3659 if (processor_alias_table[i].flags & PTA_AVX512F
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3662 if (processor_alias_table[i].flags & PTA_AVX512ER
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3665 if (processor_alias_table[i].flags & PTA_AVX512PF
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3668 if (processor_alias_table[i].flags & PTA_AVX512CD
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3671 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3674 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3675 x86_prefetch_sse = true;
3677 break;
3680 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3681 error ("generic CPU can be used only for %stune=%s %s",
3682 prefix, suffix, sw);
3683 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3684 error ("intel CPU can be used only for %stune=%s %s",
3685 prefix, suffix, sw);
3686 else if (i == pta_size)
3687 error ("bad value (%s) for %sarch=%s %s",
3688 opts->x_ix86_arch_string, prefix, suffix, sw);
3690 ix86_arch_mask = 1u << ix86_arch;
3691 for (i = 0; i < X86_ARCH_LAST; ++i)
3692 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3694 for (i = 0; i < pta_size; i++)
3695 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3697 ix86_schedule = processor_alias_table[i].schedule;
3698 ix86_tune = processor_alias_table[i].processor;
3699 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3701 if (!(processor_alias_table[i].flags & PTA_64BIT))
3703 if (ix86_tune_defaulted)
3705 opts->x_ix86_tune_string = "x86-64";
3706 for (i = 0; i < pta_size; i++)
3707 if (! strcmp (opts->x_ix86_tune_string,
3708 processor_alias_table[i].name))
3709 break;
3710 ix86_schedule = processor_alias_table[i].schedule;
3711 ix86_tune = processor_alias_table[i].processor;
3713 else
3714 error ("CPU you selected does not support x86-64 "
3715 "instruction set");
3718 /* Intel CPUs have always interpreted SSE prefetch instructions as
3719 NOPs; so, we can enable SSE prefetch instructions even when
3720 -mtune (rather than -march) points us to a processor that has them.
3721 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3722 higher processors. */
3723 if (TARGET_CMOV
3724 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3725 x86_prefetch_sse = true;
3726 break;
3729 if (ix86_tune_specified && i == pta_size)
3730 error ("bad value (%s) for %stune=%s %s",
3731 opts->x_ix86_tune_string, prefix, suffix, sw);
3733 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3735 #ifndef USE_IX86_FRAME_POINTER
3736 #define USE_IX86_FRAME_POINTER 0
3737 #endif
3739 #ifndef USE_X86_64_FRAME_POINTER
3740 #define USE_X86_64_FRAME_POINTER 0
3741 #endif
3743 /* Set the default values for switches whose default depends on TARGET_64BIT
3744 in case they weren't overwritten by command line options. */
3745 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3747 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3748 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3749 if (opts->x_flag_asynchronous_unwind_tables
3750 && !opts_set->x_flag_unwind_tables
3751 && TARGET_64BIT_MS_ABI)
3752 opts->x_flag_unwind_tables = 1;
3753 if (opts->x_flag_asynchronous_unwind_tables == 2)
3754 opts->x_flag_unwind_tables
3755 = opts->x_flag_asynchronous_unwind_tables = 1;
3756 if (opts->x_flag_pcc_struct_return == 2)
3757 opts->x_flag_pcc_struct_return = 0;
3759 else
3761 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3762 opts->x_flag_omit_frame_pointer
3763 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3764 if (opts->x_flag_asynchronous_unwind_tables == 2)
3765 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3766 if (opts->x_flag_pcc_struct_return == 2)
3767 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3770 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3771 if (opts->x_optimize_size)
3772 ix86_cost = &ix86_size_cost;
3773 else
3774 ix86_cost = ix86_tune_cost;
3776 /* Arrange to set up i386_stack_locals for all functions. */
3777 init_machine_status = ix86_init_machine_status;
3779 /* Validate -mregparm= value. */
3780 if (opts_set->x_ix86_regparm)
3782 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3783 warning (0, "-mregparm is ignored in 64-bit mode");
3784 if (opts->x_ix86_regparm > REGPARM_MAX)
3786 error ("-mregparm=%d is not between 0 and %d",
3787 opts->x_ix86_regparm, REGPARM_MAX);
3788 opts->x_ix86_regparm = 0;
3791 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3792 opts->x_ix86_regparm = REGPARM_MAX;
3794 /* Default align_* from the processor table. */
3795 if (opts->x_align_loops == 0)
3797 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3798 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3800 if (opts->x_align_jumps == 0)
3802 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3803 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3805 if (opts->x_align_functions == 0)
3807 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3810 /* Provide default for -mbranch-cost= value. */
3811 if (!opts_set->x_ix86_branch_cost)
3812 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3814 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3816 opts->x_target_flags
3817 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3819 /* Enable by default the SSE and MMX builtins. Do allow the user to
3820 explicitly disable any of these. In particular, disabling SSE and
3821 MMX for kernel code is extremely useful. */
3822 if (!ix86_arch_specified)
3823 opts->x_ix86_isa_flags
3824 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3825 | TARGET_SUBTARGET64_ISA_DEFAULT)
3826 & ~opts->x_ix86_isa_flags_explicit);
3828 if (TARGET_RTD_P (opts->x_target_flags))
3829 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3831 else
3833 opts->x_target_flags
3834 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3836 if (!ix86_arch_specified)
3837 opts->x_ix86_isa_flags
3838 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3840 /* i386 ABI does not specify red zone. It still makes sense to use it
3841 when programmer takes care to stack from being destroyed. */
3842 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3843 opts->x_target_flags |= MASK_NO_RED_ZONE;
3846 /* Keep nonleaf frame pointers. */
3847 if (opts->x_flag_omit_frame_pointer)
3848 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3849 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3850 opts->x_flag_omit_frame_pointer = 1;
3852 /* If we're doing fast math, we don't care about comparison order
3853 wrt NaNs. This lets us use a shorter comparison sequence. */
3854 if (opts->x_flag_finite_math_only)
3855 opts->x_target_flags &= ~MASK_IEEE_FP;
3857 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3858 since the insns won't need emulation. */
3859 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3860 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3862 /* Likewise, if the target doesn't have a 387, or we've specified
3863 software floating point, don't use 387 inline intrinsics. */
3864 if (!TARGET_80387_P (opts->x_target_flags))
3865 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3867 /* Turn on MMX builtins for -msse. */
3868 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3869 opts->x_ix86_isa_flags
3870 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3872 /* Enable SSE prefetch. */
3873 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3874 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3875 x86_prefetch_sse = true;
3877 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3878 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3879 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3880 opts->x_ix86_isa_flags
3881 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3883 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3884 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3885 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3886 opts->x_ix86_isa_flags
3887 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3889 /* Enable lzcnt instruction for -mabm. */
3890 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3891 opts->x_ix86_isa_flags
3892 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3894 /* Validate -mpreferred-stack-boundary= value or default it to
3895 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3896 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3897 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3899 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3900 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3901 int max = (TARGET_SEH ? 4 : 12);
3903 if (opts->x_ix86_preferred_stack_boundary_arg < min
3904 || opts->x_ix86_preferred_stack_boundary_arg > max)
3906 if (min == max)
3907 error ("-mpreferred-stack-boundary is not supported "
3908 "for this target");
3909 else
3910 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3911 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3913 else
3914 ix86_preferred_stack_boundary
3915 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3918 /* Set the default value for -mstackrealign. */
3919 if (opts->x_ix86_force_align_arg_pointer == -1)
3920 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3922 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3924 /* Validate -mincoming-stack-boundary= value or default it to
3925 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3926 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3927 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3929 if (opts->x_ix86_incoming_stack_boundary_arg
3930 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3931 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3932 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3933 opts->x_ix86_incoming_stack_boundary_arg,
3934 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3935 else
3937 ix86_user_incoming_stack_boundary
3938 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3939 ix86_incoming_stack_boundary
3940 = ix86_user_incoming_stack_boundary;
3944 /* Accept -msseregparm only if at least SSE support is enabled. */
3945 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3946 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3947 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3949 if (opts_set->x_ix86_fpmath)
3951 if (opts->x_ix86_fpmath & FPMATH_SSE)
3953 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3955 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3956 opts->x_ix86_fpmath = FPMATH_387;
3958 else if ((opts->x_ix86_fpmath & FPMATH_387)
3959 && !TARGET_80387_P (opts->x_target_flags))
3961 warning (0, "387 instruction set disabled, using SSE arithmetics");
3962 opts->x_ix86_fpmath = FPMATH_SSE;
3966 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3967 fpmath=387. The second is however default at many targets since the
3968 extra 80bit precision of temporaries is considered to be part of ABI.
3969 Overwrite the default at least for -ffast-math.
3970 TODO: -mfpmath=both seems to produce same performing code with bit
3971 smaller binaries. It is however not clear if register allocation is
3972 ready for this setting.
3973 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3974 codegen. We may switch to 387 with -ffast-math for size optimized
3975 functions. */
3976 else if (fast_math_flags_set_p (&global_options)
3977 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3978 opts->x_ix86_fpmath = FPMATH_SSE;
3979 else
3980 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3982 /* If the i387 is disabled, then do not return values in it. */
3983 if (!TARGET_80387_P (opts->x_target_flags))
3984 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3986 /* Use external vectorized library in vectorizing intrinsics. */
3987 if (opts_set->x_ix86_veclibabi_type)
3988 switch (opts->x_ix86_veclibabi_type)
3990 case ix86_veclibabi_type_svml:
3991 ix86_veclib_handler = ix86_veclibabi_svml;
3992 break;
3994 case ix86_veclibabi_type_acml:
3995 ix86_veclib_handler = ix86_veclibabi_acml;
3996 break;
3998 default:
3999 gcc_unreachable ();
4002 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4003 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4004 && !opts->x_optimize_size)
4005 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4007 /* If stack probes are required, the space used for large function
4008 arguments on the stack must also be probed, so enable
4009 -maccumulate-outgoing-args so this happens in the prologue. */
4010 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4011 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4013 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4014 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4015 "for correctness", prefix, suffix);
4016 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4019 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4021 char *p;
4022 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4023 p = strchr (internal_label_prefix, 'X');
4024 internal_label_prefix_len = p - internal_label_prefix;
4025 *p = '\0';
4028 /* When scheduling description is not available, disable scheduler pass
4029 so it won't slow down the compilation and make x87 code slower. */
4030 if (!TARGET_SCHEDULE)
4031 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4033 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4034 ix86_tune_cost->simultaneous_prefetches,
4035 opts->x_param_values,
4036 opts_set->x_param_values);
4037 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4038 ix86_tune_cost->prefetch_block,
4039 opts->x_param_values,
4040 opts_set->x_param_values);
4041 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4042 ix86_tune_cost->l1_cache_size,
4043 opts->x_param_values,
4044 opts_set->x_param_values);
4045 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4046 ix86_tune_cost->l2_cache_size,
4047 opts->x_param_values,
4048 opts_set->x_param_values);
4050 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4051 if (opts->x_flag_prefetch_loop_arrays < 0
4052 && HAVE_prefetch
4053 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4054 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4055 opts->x_flag_prefetch_loop_arrays = 1;
4057 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4058 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4059 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4060 targetm.expand_builtin_va_start = NULL;
4062 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4064 ix86_gen_leave = gen_leave_rex64;
4065 if (Pmode == DImode)
4067 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4068 ix86_gen_tls_local_dynamic_base_64
4069 = gen_tls_local_dynamic_base_64_di;
4071 else
4073 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4074 ix86_gen_tls_local_dynamic_base_64
4075 = gen_tls_local_dynamic_base_64_si;
4078 else
4079 ix86_gen_leave = gen_leave;
4081 if (Pmode == DImode)
4083 ix86_gen_add3 = gen_adddi3;
4084 ix86_gen_sub3 = gen_subdi3;
4085 ix86_gen_sub3_carry = gen_subdi3_carry;
4086 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4087 ix86_gen_andsp = gen_anddi3;
4088 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4089 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4090 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4091 ix86_gen_monitor = gen_sse3_monitor_di;
4093 else
4095 ix86_gen_add3 = gen_addsi3;
4096 ix86_gen_sub3 = gen_subsi3;
4097 ix86_gen_sub3_carry = gen_subsi3_carry;
4098 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4099 ix86_gen_andsp = gen_andsi3;
4100 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4101 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4102 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4103 ix86_gen_monitor = gen_sse3_monitor_si;
4106 #ifdef USE_IX86_CLD
4107 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4108 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4109 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4110 #endif
4112 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4114 if (opts->x_flag_fentry > 0)
4115 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4116 "with -fpic");
4117 opts->x_flag_fentry = 0;
4119 else if (TARGET_SEH)
4121 if (opts->x_flag_fentry == 0)
4122 sorry ("-mno-fentry isn%'t compatible with SEH");
4123 opts->x_flag_fentry = 1;
4125 else if (opts->x_flag_fentry < 0)
4127 #if defined(PROFILE_BEFORE_PROLOGUE)
4128 opts->x_flag_fentry = 1;
4129 #else
4130 opts->x_flag_fentry = 0;
4131 #endif
4134 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4135 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4136 AVX unaligned load/store. */
4137 if (!opts->x_optimize_size)
4139 if (flag_expensive_optimizations
4140 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4141 opts->x_target_flags |= MASK_VZEROUPPER;
4142 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4143 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4144 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4145 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4146 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4147 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4148 /* Enable 128-bit AVX instruction generation
4149 for the auto-vectorizer. */
4150 if (TARGET_AVX128_OPTIMAL
4151 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4152 opts->x_target_flags |= MASK_PREFER_AVX128;
4155 if (opts->x_ix86_recip_name)
4157 char *p = ASTRDUP (opts->x_ix86_recip_name);
4158 char *q;
4159 unsigned int mask, i;
4160 bool invert;
4162 while ((q = strtok (p, ",")) != NULL)
4164 p = NULL;
4165 if (*q == '!')
4167 invert = true;
4168 q++;
4170 else
4171 invert = false;
4173 if (!strcmp (q, "default"))
4174 mask = RECIP_MASK_ALL;
4175 else
4177 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4178 if (!strcmp (q, recip_options[i].string))
4180 mask = recip_options[i].mask;
4181 break;
4184 if (i == ARRAY_SIZE (recip_options))
4186 error ("unknown option for -mrecip=%s", q);
4187 invert = false;
4188 mask = RECIP_MASK_NONE;
4192 opts->x_recip_mask_explicit |= mask;
4193 if (invert)
4194 opts->x_recip_mask &= ~mask;
4195 else
4196 opts->x_recip_mask |= mask;
4200 if (TARGET_RECIP_P (opts->x_target_flags))
4201 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4202 else if (opts_set->x_target_flags & MASK_RECIP)
4203 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4205 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4206 for 64-bit Bionic. */
4207 if (TARGET_HAS_BIONIC
4208 && !(opts_set->x_target_flags
4209 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4210 opts->x_target_flags |= (TARGET_64BIT
4211 ? MASK_LONG_DOUBLE_128
4212 : MASK_LONG_DOUBLE_64);
4214 /* Only one of them can be active. */
4215 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4216 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4218 /* Save the initial options in case the user does function specific
4219 options. */
4220 if (main_args_p)
4221 target_option_default_node = target_option_current_node
4222 = build_target_option_node (opts);
4224 /* Handle stack protector */
4225 if (!opts_set->x_ix86_stack_protector_guard)
4226 opts->x_ix86_stack_protector_guard
4227 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4229 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4230 if (opts->x_ix86_tune_memcpy_strategy)
4232 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4233 ix86_parse_stringop_strategy_string (str, false);
4234 free (str);
4237 if (opts->x_ix86_tune_memset_strategy)
4239 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4240 ix86_parse_stringop_strategy_string (str, true);
4241 free (str);
4245 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4247 static void
4248 ix86_option_override (void)
4250 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4251 static struct register_pass_info insert_vzeroupper_info
4252 = { pass_insert_vzeroupper, "reload",
4253 1, PASS_POS_INSERT_AFTER
4256 ix86_option_override_internal (true, &global_options, &global_options_set);
4259 /* This needs to be done at start up. It's convenient to do it here. */
4260 register_pass (&insert_vzeroupper_info);
4263 /* Update register usage after having seen the compiler flags. */
4265 static void
4266 ix86_conditional_register_usage (void)
4268 int i, c_mask;
4269 unsigned int j;
4271 /* The PIC register, if it exists, is fixed. */
4272 j = PIC_OFFSET_TABLE_REGNUM;
4273 if (j != INVALID_REGNUM)
4274 fixed_regs[j] = call_used_regs[j] = 1;
4276 /* For 32-bit targets, squash the REX registers. */
4277 if (! TARGET_64BIT)
4279 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4280 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4281 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4282 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4283 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4284 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4287 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4288 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4289 : TARGET_64BIT ? (1 << 2)
4290 : (1 << 1));
4292 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4294 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4296 /* Set/reset conditionally defined registers from
4297 CALL_USED_REGISTERS initializer. */
4298 if (call_used_regs[i] > 1)
4299 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4301 /* Calculate registers of CLOBBERED_REGS register set
4302 as call used registers from GENERAL_REGS register set. */
4303 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4304 && call_used_regs[i])
4305 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4308 /* If MMX is disabled, squash the registers. */
4309 if (! TARGET_MMX)
4310 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4311 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4312 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4314 /* If SSE is disabled, squash the registers. */
4315 if (! TARGET_SSE)
4316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4317 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4318 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4320 /* If the FPU is disabled, squash the registers. */
4321 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4322 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4323 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4324 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4326 /* If AVX512F is disabled, squash the registers. */
4327 if (! TARGET_AVX512F)
4329 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4330 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4332 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4333 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4338 /* Save the current options */
4340 static void
4341 ix86_function_specific_save (struct cl_target_option *ptr,
4342 struct gcc_options *opts)
4344 ptr->arch = ix86_arch;
4345 ptr->schedule = ix86_schedule;
4346 ptr->tune = ix86_tune;
4347 ptr->branch_cost = ix86_branch_cost;
4348 ptr->tune_defaulted = ix86_tune_defaulted;
4349 ptr->arch_specified = ix86_arch_specified;
4350 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4351 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4352 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4353 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4354 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4355 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4356 ptr->x_ix86_abi = opts->x_ix86_abi;
4357 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4358 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4359 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4360 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4361 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4362 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4363 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4364 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4365 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4366 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4367 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4368 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4369 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4370 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4371 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4372 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4373 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4374 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4375 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4376 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4378 /* The fields are char but the variables are not; make sure the
4379 values fit in the fields. */
4380 gcc_assert (ptr->arch == ix86_arch);
4381 gcc_assert (ptr->schedule == ix86_schedule);
4382 gcc_assert (ptr->tune == ix86_tune);
4383 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4386 /* Restore the current options */
4388 static void
4389 ix86_function_specific_restore (struct gcc_options *opts,
4390 struct cl_target_option *ptr)
4392 enum processor_type old_tune = ix86_tune;
4393 enum processor_type old_arch = ix86_arch;
4394 unsigned int ix86_arch_mask;
4395 int i;
4397 /* We don't change -fPIC. */
4398 opts->x_flag_pic = flag_pic;
4400 ix86_arch = (enum processor_type) ptr->arch;
4401 ix86_schedule = (enum attr_cpu) ptr->schedule;
4402 ix86_tune = (enum processor_type) ptr->tune;
4403 opts->x_ix86_branch_cost = ptr->branch_cost;
4404 ix86_tune_defaulted = ptr->tune_defaulted;
4405 ix86_arch_specified = ptr->arch_specified;
4406 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4407 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4408 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4409 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4410 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4411 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4412 opts->x_ix86_abi = ptr->x_ix86_abi;
4413 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4414 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4415 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4416 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4417 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4418 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4419 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4420 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4421 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4422 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4423 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4424 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4425 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4426 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4427 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4428 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4429 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4430 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4431 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4432 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4434 /* Recreate the arch feature tests if the arch changed */
4435 if (old_arch != ix86_arch)
4437 ix86_arch_mask = 1u << ix86_arch;
4438 for (i = 0; i < X86_ARCH_LAST; ++i)
4439 ix86_arch_features[i]
4440 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4443 /* Recreate the tune optimization tests */
4444 if (old_tune != ix86_tune)
4445 set_ix86_tune_features (ix86_tune, false);
4448 /* Print the current options */
4450 static void
4451 ix86_function_specific_print (FILE *file, int indent,
4452 struct cl_target_option *ptr)
4454 char *target_string
4455 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4456 NULL, NULL, ptr->x_ix86_fpmath, false);
4458 gcc_assert (ptr->arch < PROCESSOR_max);
4459 fprintf (file, "%*sarch = %d (%s)\n",
4460 indent, "",
4461 ptr->arch, processor_target_table[ptr->arch].name);
4463 gcc_assert (ptr->tune < PROCESSOR_max);
4464 fprintf (file, "%*stune = %d (%s)\n",
4465 indent, "",
4466 ptr->tune, processor_target_table[ptr->tune].name);
4468 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4470 if (target_string)
4472 fprintf (file, "%*s%s\n", indent, "", target_string);
4473 free (target_string);
4478 /* Inner function to process the attribute((target(...))), take an argument and
4479 set the current options from the argument. If we have a list, recursively go
4480 over the list. */
4482 static bool
4483 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4484 struct gcc_options *opts,
4485 struct gcc_options *opts_set,
4486 struct gcc_options *enum_opts_set)
4488 char *next_optstr;
4489 bool ret = true;
4491 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4492 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4493 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4494 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4495 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4497 enum ix86_opt_type
4499 ix86_opt_unknown,
4500 ix86_opt_yes,
4501 ix86_opt_no,
4502 ix86_opt_str,
4503 ix86_opt_enum,
4504 ix86_opt_isa
4507 static const struct
4509 const char *string;
4510 size_t len;
4511 enum ix86_opt_type type;
4512 int opt;
4513 int mask;
4514 } attrs[] = {
4515 /* isa options */
4516 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4517 IX86_ATTR_ISA ("abm", OPT_mabm),
4518 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4519 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4520 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4521 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4522 IX86_ATTR_ISA ("aes", OPT_maes),
4523 IX86_ATTR_ISA ("sha", OPT_msha),
4524 IX86_ATTR_ISA ("avx", OPT_mavx),
4525 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4526 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4527 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4528 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4529 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4530 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4531 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4532 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4533 IX86_ATTR_ISA ("sse", OPT_msse),
4534 IX86_ATTR_ISA ("sse2", OPT_msse2),
4535 IX86_ATTR_ISA ("sse3", OPT_msse3),
4536 IX86_ATTR_ISA ("sse4", OPT_msse4),
4537 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4538 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4539 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4540 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4541 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4542 IX86_ATTR_ISA ("fma", OPT_mfma),
4543 IX86_ATTR_ISA ("xop", OPT_mxop),
4544 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4545 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4546 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4547 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4548 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4549 IX86_ATTR_ISA ("hle", OPT_mhle),
4550 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4551 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4552 IX86_ATTR_ISA ("adx", OPT_madx),
4553 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4554 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4555 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4556 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4558 /* enum options */
4559 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4561 /* string options */
4562 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4563 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4565 /* flag options */
4566 IX86_ATTR_YES ("cld",
4567 OPT_mcld,
4568 MASK_CLD),
4570 IX86_ATTR_NO ("fancy-math-387",
4571 OPT_mfancy_math_387,
4572 MASK_NO_FANCY_MATH_387),
4574 IX86_ATTR_YES ("ieee-fp",
4575 OPT_mieee_fp,
4576 MASK_IEEE_FP),
4578 IX86_ATTR_YES ("inline-all-stringops",
4579 OPT_minline_all_stringops,
4580 MASK_INLINE_ALL_STRINGOPS),
4582 IX86_ATTR_YES ("inline-stringops-dynamically",
4583 OPT_minline_stringops_dynamically,
4584 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4586 IX86_ATTR_NO ("align-stringops",
4587 OPT_mno_align_stringops,
4588 MASK_NO_ALIGN_STRINGOPS),
4590 IX86_ATTR_YES ("recip",
4591 OPT_mrecip,
4592 MASK_RECIP),
4596 /* If this is a list, recurse to get the options. */
4597 if (TREE_CODE (args) == TREE_LIST)
4599 bool ret = true;
4601 for (; args; args = TREE_CHAIN (args))
4602 if (TREE_VALUE (args)
4603 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4604 p_strings, opts, opts_set,
4605 enum_opts_set))
4606 ret = false;
4608 return ret;
4611 else if (TREE_CODE (args) != STRING_CST)
4613 error ("attribute %<target%> argument not a string");
4614 return false;
4617 /* Handle multiple arguments separated by commas. */
4618 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4620 while (next_optstr && *next_optstr != '\0')
4622 char *p = next_optstr;
4623 char *orig_p = p;
4624 char *comma = strchr (next_optstr, ',');
4625 const char *opt_string;
4626 size_t len, opt_len;
4627 int opt;
4628 bool opt_set_p;
4629 char ch;
4630 unsigned i;
4631 enum ix86_opt_type type = ix86_opt_unknown;
4632 int mask = 0;
4634 if (comma)
4636 *comma = '\0';
4637 len = comma - next_optstr;
4638 next_optstr = comma + 1;
4640 else
4642 len = strlen (p);
4643 next_optstr = NULL;
4646 /* Recognize no-xxx. */
4647 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4649 opt_set_p = false;
4650 p += 3;
4651 len -= 3;
4653 else
4654 opt_set_p = true;
4656 /* Find the option. */
4657 ch = *p;
4658 opt = N_OPTS;
4659 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4661 type = attrs[i].type;
4662 opt_len = attrs[i].len;
4663 if (ch == attrs[i].string[0]
4664 && ((type != ix86_opt_str && type != ix86_opt_enum)
4665 ? len == opt_len
4666 : len > opt_len)
4667 && memcmp (p, attrs[i].string, opt_len) == 0)
4669 opt = attrs[i].opt;
4670 mask = attrs[i].mask;
4671 opt_string = attrs[i].string;
4672 break;
4676 /* Process the option. */
4677 if (opt == N_OPTS)
4679 error ("attribute(target(\"%s\")) is unknown", orig_p);
4680 ret = false;
4683 else if (type == ix86_opt_isa)
4685 struct cl_decoded_option decoded;
4687 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4688 ix86_handle_option (opts, opts_set,
4689 &decoded, input_location);
4692 else if (type == ix86_opt_yes || type == ix86_opt_no)
4694 if (type == ix86_opt_no)
4695 opt_set_p = !opt_set_p;
4697 if (opt_set_p)
4698 opts->x_target_flags |= mask;
4699 else
4700 opts->x_target_flags &= ~mask;
4703 else if (type == ix86_opt_str)
4705 if (p_strings[opt])
4707 error ("option(\"%s\") was already specified", opt_string);
4708 ret = false;
4710 else
4711 p_strings[opt] = xstrdup (p + opt_len);
4714 else if (type == ix86_opt_enum)
4716 bool arg_ok;
4717 int value;
4719 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4720 if (arg_ok)
4721 set_option (opts, enum_opts_set, opt, value,
4722 p + opt_len, DK_UNSPECIFIED, input_location,
4723 global_dc);
4724 else
4726 error ("attribute(target(\"%s\")) is unknown", orig_p);
4727 ret = false;
4731 else
4732 gcc_unreachable ();
4735 return ret;
4738 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4740 tree
4741 ix86_valid_target_attribute_tree (tree args,
4742 struct gcc_options *opts,
4743 struct gcc_options *opts_set)
4745 const char *orig_arch_string = opts->x_ix86_arch_string;
4746 const char *orig_tune_string = opts->x_ix86_tune_string;
4747 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4748 int orig_tune_defaulted = ix86_tune_defaulted;
4749 int orig_arch_specified = ix86_arch_specified;
4750 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4751 tree t = NULL_TREE;
4752 int i;
4753 struct cl_target_option *def
4754 = TREE_TARGET_OPTION (target_option_default_node);
4755 struct gcc_options enum_opts_set;
4757 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4759 /* Process each of the options on the chain. */
4760 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4761 opts_set, &enum_opts_set))
4762 return error_mark_node;
4764 /* If the changed options are different from the default, rerun
4765 ix86_option_override_internal, and then save the options away.
4766 The string options are are attribute options, and will be undone
4767 when we copy the save structure. */
4768 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4769 || opts->x_target_flags != def->x_target_flags
4770 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4771 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4772 || enum_opts_set.x_ix86_fpmath)
4774 /* If we are using the default tune= or arch=, undo the string assigned,
4775 and use the default. */
4776 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4777 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4778 else if (!orig_arch_specified)
4779 opts->x_ix86_arch_string = NULL;
4781 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4782 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4783 else if (orig_tune_defaulted)
4784 opts->x_ix86_tune_string = NULL;
4786 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4787 if (enum_opts_set.x_ix86_fpmath)
4788 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4789 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4790 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4792 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4793 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4796 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4797 ix86_option_override_internal (false, opts, opts_set);
4799 /* Add any builtin functions with the new isa if any. */
4800 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4802 /* Save the current options unless we are validating options for
4803 #pragma. */
4804 t = build_target_option_node (opts);
4806 opts->x_ix86_arch_string = orig_arch_string;
4807 opts->x_ix86_tune_string = orig_tune_string;
4808 opts_set->x_ix86_fpmath = orig_fpmath_set;
4810 /* Free up memory allocated to hold the strings */
4811 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4812 free (option_strings[i]);
4815 return t;
4818 /* Hook to validate attribute((target("string"))). */
4820 static bool
4821 ix86_valid_target_attribute_p (tree fndecl,
4822 tree ARG_UNUSED (name),
4823 tree args,
4824 int ARG_UNUSED (flags))
4826 struct gcc_options func_options;
4827 tree new_target, new_optimize;
4828 bool ret = true;
4830 /* attribute((target("default"))) does nothing, beyond
4831 affecting multi-versioning. */
4832 if (TREE_VALUE (args)
4833 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4834 && TREE_CHAIN (args) == NULL_TREE
4835 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4836 return true;
4838 tree old_optimize = build_optimization_node (&global_options);
4840 /* Get the optimization options of the current function. */
4841 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4843 if (!func_optimize)
4844 func_optimize = old_optimize;
4846 /* Init func_options. */
4847 memset (&func_options, 0, sizeof (func_options));
4848 init_options_struct (&func_options, NULL);
4849 lang_hooks.init_options_struct (&func_options);
4851 cl_optimization_restore (&func_options,
4852 TREE_OPTIMIZATION (func_optimize));
4854 /* Initialize func_options to the default before its target options can
4855 be set. */
4856 cl_target_option_restore (&func_options,
4857 TREE_TARGET_OPTION (target_option_default_node));
4859 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4860 &global_options_set);
4862 new_optimize = build_optimization_node (&func_options);
4864 if (new_target == error_mark_node)
4865 ret = false;
4867 else if (fndecl && new_target)
4869 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4871 if (old_optimize != new_optimize)
4872 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4875 return ret;
4879 /* Hook to determine if one function can safely inline another. */
4881 static bool
4882 ix86_can_inline_p (tree caller, tree callee)
4884 bool ret = false;
4885 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4886 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4888 /* If callee has no option attributes, then it is ok to inline. */
4889 if (!callee_tree)
4890 ret = true;
4892 /* If caller has no option attributes, but callee does then it is not ok to
4893 inline. */
4894 else if (!caller_tree)
4895 ret = false;
4897 else
4899 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4900 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4902 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4903 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4904 function. */
4905 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4906 != callee_opts->x_ix86_isa_flags)
4907 ret = false;
4909 /* See if we have the same non-isa options. */
4910 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4911 ret = false;
4913 /* See if arch, tune, etc. are the same. */
4914 else if (caller_opts->arch != callee_opts->arch)
4915 ret = false;
4917 else if (caller_opts->tune != callee_opts->tune)
4918 ret = false;
4920 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4921 ret = false;
4923 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4924 ret = false;
4926 else
4927 ret = true;
4930 return ret;
4934 /* Remember the last target of ix86_set_current_function. */
4935 static GTY(()) tree ix86_previous_fndecl;
4937 /* Invalidate ix86_previous_fndecl cache. */
4938 void
4939 ix86_reset_previous_fndecl (void)
4941 ix86_previous_fndecl = NULL_TREE;
4944 /* Establish appropriate back-end context for processing the function
4945 FNDECL. The argument might be NULL to indicate processing at top
4946 level, outside of any function scope. */
4947 static void
4948 ix86_set_current_function (tree fndecl)
4950 /* Only change the context if the function changes. This hook is called
4951 several times in the course of compiling a function, and we don't want to
4952 slow things down too much or call target_reinit when it isn't safe. */
4953 if (fndecl && fndecl != ix86_previous_fndecl)
4955 tree old_tree = (ix86_previous_fndecl
4956 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4957 : NULL_TREE);
4959 tree new_tree = (fndecl
4960 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4961 : NULL_TREE);
4963 ix86_previous_fndecl = fndecl;
4964 if (old_tree == new_tree)
4967 else if (new_tree)
4969 cl_target_option_restore (&global_options,
4970 TREE_TARGET_OPTION (new_tree));
4971 if (TREE_TARGET_GLOBALS (new_tree))
4972 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4973 else
4974 TREE_TARGET_GLOBALS (new_tree)
4975 = save_target_globals_default_opts ();
4978 else if (old_tree)
4980 new_tree = target_option_current_node;
4981 cl_target_option_restore (&global_options,
4982 TREE_TARGET_OPTION (new_tree));
4983 if (TREE_TARGET_GLOBALS (new_tree))
4984 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4985 else if (new_tree == target_option_default_node)
4986 restore_target_globals (&default_target_globals);
4987 else
4988 TREE_TARGET_GLOBALS (new_tree)
4989 = save_target_globals_default_opts ();
4995 /* Return true if this goes in large data/bss. */
4997 static bool
4998 ix86_in_large_data_p (tree exp)
5000 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5001 return false;
5003 /* Functions are never large data. */
5004 if (TREE_CODE (exp) == FUNCTION_DECL)
5005 return false;
5007 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5009 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5010 if (strcmp (section, ".ldata") == 0
5011 || strcmp (section, ".lbss") == 0)
5012 return true;
5013 return false;
5015 else
5017 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5019 /* If this is an incomplete type with size 0, then we can't put it
5020 in data because it might be too big when completed. */
5021 if (!size || size > ix86_section_threshold)
5022 return true;
5025 return false;
5028 /* Switch to the appropriate section for output of DECL.
5029 DECL is either a `VAR_DECL' node or a constant of some sort.
5030 RELOC indicates whether forming the initial value of DECL requires
5031 link-time relocations. */
5033 ATTRIBUTE_UNUSED static section *
5034 x86_64_elf_select_section (tree decl, int reloc,
5035 unsigned HOST_WIDE_INT align)
5037 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5038 && ix86_in_large_data_p (decl))
5040 const char *sname = NULL;
5041 unsigned int flags = SECTION_WRITE;
5042 switch (categorize_decl_for_section (decl, reloc))
5044 case SECCAT_DATA:
5045 sname = ".ldata";
5046 break;
5047 case SECCAT_DATA_REL:
5048 sname = ".ldata.rel";
5049 break;
5050 case SECCAT_DATA_REL_LOCAL:
5051 sname = ".ldata.rel.local";
5052 break;
5053 case SECCAT_DATA_REL_RO:
5054 sname = ".ldata.rel.ro";
5055 break;
5056 case SECCAT_DATA_REL_RO_LOCAL:
5057 sname = ".ldata.rel.ro.local";
5058 break;
5059 case SECCAT_BSS:
5060 sname = ".lbss";
5061 flags |= SECTION_BSS;
5062 break;
5063 case SECCAT_RODATA:
5064 case SECCAT_RODATA_MERGE_STR:
5065 case SECCAT_RODATA_MERGE_STR_INIT:
5066 case SECCAT_RODATA_MERGE_CONST:
5067 sname = ".lrodata";
5068 flags = 0;
5069 break;
5070 case SECCAT_SRODATA:
5071 case SECCAT_SDATA:
5072 case SECCAT_SBSS:
5073 gcc_unreachable ();
5074 case SECCAT_TEXT:
5075 case SECCAT_TDATA:
5076 case SECCAT_TBSS:
5077 /* We don't split these for medium model. Place them into
5078 default sections and hope for best. */
5079 break;
5081 if (sname)
5083 /* We might get called with string constants, but get_named_section
5084 doesn't like them as they are not DECLs. Also, we need to set
5085 flags in that case. */
5086 if (!DECL_P (decl))
5087 return get_section (sname, flags, NULL);
5088 return get_named_section (decl, sname, reloc);
5091 return default_elf_select_section (decl, reloc, align);
5094 /* Select a set of attributes for section NAME based on the properties
5095 of DECL and whether or not RELOC indicates that DECL's initializer
5096 might contain runtime relocations. */
5098 static unsigned int ATTRIBUTE_UNUSED
5099 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5101 unsigned int flags = default_section_type_flags (decl, name, reloc);
5103 if (decl == NULL_TREE
5104 && (strcmp (name, ".ldata.rel.ro") == 0
5105 || strcmp (name, ".ldata.rel.ro.local") == 0))
5106 flags |= SECTION_RELRO;
5108 if (strcmp (name, ".lbss") == 0
5109 || strncmp (name, ".lbss.", 5) == 0
5110 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5111 flags |= SECTION_BSS;
5113 return flags;
5116 /* Build up a unique section name, expressed as a
5117 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5118 RELOC indicates whether the initial value of EXP requires
5119 link-time relocations. */
5121 static void ATTRIBUTE_UNUSED
5122 x86_64_elf_unique_section (tree decl, int reloc)
5124 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5125 && ix86_in_large_data_p (decl))
5127 const char *prefix = NULL;
5128 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5129 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5131 switch (categorize_decl_for_section (decl, reloc))
5133 case SECCAT_DATA:
5134 case SECCAT_DATA_REL:
5135 case SECCAT_DATA_REL_LOCAL:
5136 case SECCAT_DATA_REL_RO:
5137 case SECCAT_DATA_REL_RO_LOCAL:
5138 prefix = one_only ? ".ld" : ".ldata";
5139 break;
5140 case SECCAT_BSS:
5141 prefix = one_only ? ".lb" : ".lbss";
5142 break;
5143 case SECCAT_RODATA:
5144 case SECCAT_RODATA_MERGE_STR:
5145 case SECCAT_RODATA_MERGE_STR_INIT:
5146 case SECCAT_RODATA_MERGE_CONST:
5147 prefix = one_only ? ".lr" : ".lrodata";
5148 break;
5149 case SECCAT_SRODATA:
5150 case SECCAT_SDATA:
5151 case SECCAT_SBSS:
5152 gcc_unreachable ();
5153 case SECCAT_TEXT:
5154 case SECCAT_TDATA:
5155 case SECCAT_TBSS:
5156 /* We don't split these for medium model. Place them into
5157 default sections and hope for best. */
5158 break;
5160 if (prefix)
5162 const char *name, *linkonce;
5163 char *string;
5165 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5166 name = targetm.strip_name_encoding (name);
5168 /* If we're using one_only, then there needs to be a .gnu.linkonce
5169 prefix to the section name. */
5170 linkonce = one_only ? ".gnu.linkonce" : "";
5172 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5174 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5175 return;
5178 default_unique_section (decl, reloc);
5181 #ifdef COMMON_ASM_OP
5182 /* This says how to output assembler code to declare an
5183 uninitialized external linkage data object.
5185 For medium model x86-64 we need to use .largecomm opcode for
5186 large objects. */
5187 void
5188 x86_elf_aligned_common (FILE *file,
5189 const char *name, unsigned HOST_WIDE_INT size,
5190 int align)
5192 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5193 && size > (unsigned int)ix86_section_threshold)
5194 fputs (".largecomm\t", file);
5195 else
5196 fputs (COMMON_ASM_OP, file);
5197 assemble_name (file, name);
5198 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5199 size, align / BITS_PER_UNIT);
5201 #endif
5203 /* Utility function for targets to use in implementing
5204 ASM_OUTPUT_ALIGNED_BSS. */
5206 void
5207 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5208 const char *name, unsigned HOST_WIDE_INT size,
5209 int align)
5211 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5212 && size > (unsigned int)ix86_section_threshold)
5213 switch_to_section (get_named_section (decl, ".lbss", 0));
5214 else
5215 switch_to_section (bss_section);
5216 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5217 #ifdef ASM_DECLARE_OBJECT_NAME
5218 last_assemble_variable_decl = decl;
5219 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5220 #else
5221 /* Standard thing is just output label for the object. */
5222 ASM_OUTPUT_LABEL (file, name);
5223 #endif /* ASM_DECLARE_OBJECT_NAME */
5224 ASM_OUTPUT_SKIP (file, size ? size : 1);
5227 /* Decide whether we must probe the stack before any space allocation
5228 on this target. It's essentially TARGET_STACK_PROBE except when
5229 -fstack-check causes the stack to be already probed differently. */
5231 bool
5232 ix86_target_stack_probe (void)
5234 /* Do not probe the stack twice if static stack checking is enabled. */
5235 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5236 return false;
5238 return TARGET_STACK_PROBE;
5241 /* Decide whether we can make a sibling call to a function. DECL is the
5242 declaration of the function being targeted by the call and EXP is the
5243 CALL_EXPR representing the call. */
5245 static bool
5246 ix86_function_ok_for_sibcall (tree decl, tree exp)
5248 tree type, decl_or_type;
5249 rtx a, b;
5251 /* If we are generating position-independent code, we cannot sibcall
5252 optimize any indirect call, or a direct call to a global function,
5253 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5254 if (!TARGET_MACHO
5255 && !TARGET_64BIT
5256 && flag_pic
5257 && (!decl || !targetm.binds_local_p (decl)))
5258 return false;
5260 /* If we need to align the outgoing stack, then sibcalling would
5261 unalign the stack, which may break the called function. */
5262 if (ix86_minimum_incoming_stack_boundary (true)
5263 < PREFERRED_STACK_BOUNDARY)
5264 return false;
5266 if (decl)
5268 decl_or_type = decl;
5269 type = TREE_TYPE (decl);
5271 else
5273 /* We're looking at the CALL_EXPR, we need the type of the function. */
5274 type = CALL_EXPR_FN (exp); /* pointer expression */
5275 type = TREE_TYPE (type); /* pointer type */
5276 type = TREE_TYPE (type); /* function type */
5277 decl_or_type = type;
5280 /* Check that the return value locations are the same. Like
5281 if we are returning floats on the 80387 register stack, we cannot
5282 make a sibcall from a function that doesn't return a float to a
5283 function that does or, conversely, from a function that does return
5284 a float to a function that doesn't; the necessary stack adjustment
5285 would not be executed. This is also the place we notice
5286 differences in the return value ABI. Note that it is ok for one
5287 of the functions to have void return type as long as the return
5288 value of the other is passed in a register. */
5289 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5290 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5291 cfun->decl, false);
5292 if (STACK_REG_P (a) || STACK_REG_P (b))
5294 if (!rtx_equal_p (a, b))
5295 return false;
5297 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5299 else if (!rtx_equal_p (a, b))
5300 return false;
5302 if (TARGET_64BIT)
5304 /* The SYSV ABI has more call-clobbered registers;
5305 disallow sibcalls from MS to SYSV. */
5306 if (cfun->machine->call_abi == MS_ABI
5307 && ix86_function_type_abi (type) == SYSV_ABI)
5308 return false;
5310 else
5312 /* If this call is indirect, we'll need to be able to use a
5313 call-clobbered register for the address of the target function.
5314 Make sure that all such registers are not used for passing
5315 parameters. Note that DLLIMPORT functions are indirect. */
5316 if (!decl
5317 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5319 if (ix86_function_regparm (type, NULL) >= 3)
5321 /* ??? Need to count the actual number of registers to be used,
5322 not the possible number of registers. Fix later. */
5323 return false;
5328 /* Otherwise okay. That also includes certain types of indirect calls. */
5329 return true;
5332 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5333 and "sseregparm" calling convention attributes;
5334 arguments as in struct attribute_spec.handler. */
5336 static tree
5337 ix86_handle_cconv_attribute (tree *node, tree name,
5338 tree args,
5339 int flags ATTRIBUTE_UNUSED,
5340 bool *no_add_attrs)
5342 if (TREE_CODE (*node) != FUNCTION_TYPE
5343 && TREE_CODE (*node) != METHOD_TYPE
5344 && TREE_CODE (*node) != FIELD_DECL
5345 && TREE_CODE (*node) != TYPE_DECL)
5347 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5348 name);
5349 *no_add_attrs = true;
5350 return NULL_TREE;
5353 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5354 if (is_attribute_p ("regparm", name))
5356 tree cst;
5358 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5360 error ("fastcall and regparm attributes are not compatible");
5363 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5365 error ("regparam and thiscall attributes are not compatible");
5368 cst = TREE_VALUE (args);
5369 if (TREE_CODE (cst) != INTEGER_CST)
5371 warning (OPT_Wattributes,
5372 "%qE attribute requires an integer constant argument",
5373 name);
5374 *no_add_attrs = true;
5376 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5378 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5379 name, REGPARM_MAX);
5380 *no_add_attrs = true;
5383 return NULL_TREE;
5386 if (TARGET_64BIT)
5388 /* Do not warn when emulating the MS ABI. */
5389 if ((TREE_CODE (*node) != FUNCTION_TYPE
5390 && TREE_CODE (*node) != METHOD_TYPE)
5391 || ix86_function_type_abi (*node) != MS_ABI)
5392 warning (OPT_Wattributes, "%qE attribute ignored",
5393 name);
5394 *no_add_attrs = true;
5395 return NULL_TREE;
5398 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5399 if (is_attribute_p ("fastcall", name))
5401 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5403 error ("fastcall and cdecl attributes are not compatible");
5405 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5407 error ("fastcall and stdcall attributes are not compatible");
5409 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5411 error ("fastcall and regparm attributes are not compatible");
5413 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5415 error ("fastcall and thiscall attributes are not compatible");
5419 /* Can combine stdcall with fastcall (redundant), regparm and
5420 sseregparm. */
5421 else if (is_attribute_p ("stdcall", name))
5423 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5425 error ("stdcall and cdecl attributes are not compatible");
5427 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5429 error ("stdcall and fastcall attributes are not compatible");
5431 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5433 error ("stdcall and thiscall attributes are not compatible");
5437 /* Can combine cdecl with regparm and sseregparm. */
5438 else if (is_attribute_p ("cdecl", name))
5440 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5442 error ("stdcall and cdecl attributes are not compatible");
5444 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5446 error ("fastcall and cdecl attributes are not compatible");
5448 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5450 error ("cdecl and thiscall attributes are not compatible");
5453 else if (is_attribute_p ("thiscall", name))
5455 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5456 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5457 name);
5458 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5460 error ("stdcall and thiscall attributes are not compatible");
5462 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5464 error ("fastcall and thiscall attributes are not compatible");
5466 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5468 error ("cdecl and thiscall attributes are not compatible");
5472 /* Can combine sseregparm with all attributes. */
5474 return NULL_TREE;
5477 /* The transactional memory builtins are implicitly regparm or fastcall
5478 depending on the ABI. Override the generic do-nothing attribute that
5479 these builtins were declared with, and replace it with one of the two
5480 attributes that we expect elsewhere. */
5482 static tree
5483 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5484 tree args ATTRIBUTE_UNUSED,
5485 int flags, bool *no_add_attrs)
5487 tree alt;
5489 /* In no case do we want to add the placeholder attribute. */
5490 *no_add_attrs = true;
5492 /* The 64-bit ABI is unchanged for transactional memory. */
5493 if (TARGET_64BIT)
5494 return NULL_TREE;
5496 /* ??? Is there a better way to validate 32-bit windows? We have
5497 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5498 if (CHECK_STACK_LIMIT > 0)
5499 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5500 else
5502 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5503 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5505 decl_attributes (node, alt, flags);
5507 return NULL_TREE;
5510 /* This function determines from TYPE the calling-convention. */
5512 unsigned int
5513 ix86_get_callcvt (const_tree type)
5515 unsigned int ret = 0;
5516 bool is_stdarg;
5517 tree attrs;
5519 if (TARGET_64BIT)
5520 return IX86_CALLCVT_CDECL;
5522 attrs = TYPE_ATTRIBUTES (type);
5523 if (attrs != NULL_TREE)
5525 if (lookup_attribute ("cdecl", attrs))
5526 ret |= IX86_CALLCVT_CDECL;
5527 else if (lookup_attribute ("stdcall", attrs))
5528 ret |= IX86_CALLCVT_STDCALL;
5529 else if (lookup_attribute ("fastcall", attrs))
5530 ret |= IX86_CALLCVT_FASTCALL;
5531 else if (lookup_attribute ("thiscall", attrs))
5532 ret |= IX86_CALLCVT_THISCALL;
5534 /* Regparam isn't allowed for thiscall and fastcall. */
5535 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5537 if (lookup_attribute ("regparm", attrs))
5538 ret |= IX86_CALLCVT_REGPARM;
5539 if (lookup_attribute ("sseregparm", attrs))
5540 ret |= IX86_CALLCVT_SSEREGPARM;
5543 if (IX86_BASE_CALLCVT(ret) != 0)
5544 return ret;
5547 is_stdarg = stdarg_p (type);
5548 if (TARGET_RTD && !is_stdarg)
5549 return IX86_CALLCVT_STDCALL | ret;
5551 if (ret != 0
5552 || is_stdarg
5553 || TREE_CODE (type) != METHOD_TYPE
5554 || ix86_function_type_abi (type) != MS_ABI)
5555 return IX86_CALLCVT_CDECL | ret;
5557 return IX86_CALLCVT_THISCALL;
5560 /* Return 0 if the attributes for two types are incompatible, 1 if they
5561 are compatible, and 2 if they are nearly compatible (which causes a
5562 warning to be generated). */
5564 static int
5565 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5567 unsigned int ccvt1, ccvt2;
5569 if (TREE_CODE (type1) != FUNCTION_TYPE
5570 && TREE_CODE (type1) != METHOD_TYPE)
5571 return 1;
5573 ccvt1 = ix86_get_callcvt (type1);
5574 ccvt2 = ix86_get_callcvt (type2);
5575 if (ccvt1 != ccvt2)
5576 return 0;
5577 if (ix86_function_regparm (type1, NULL)
5578 != ix86_function_regparm (type2, NULL))
5579 return 0;
5581 return 1;
5584 /* Return the regparm value for a function with the indicated TYPE and DECL.
5585 DECL may be NULL when calling function indirectly
5586 or considering a libcall. */
5588 static int
5589 ix86_function_regparm (const_tree type, const_tree decl)
5591 tree attr;
5592 int regparm;
5593 unsigned int ccvt;
5595 if (TARGET_64BIT)
5596 return (ix86_function_type_abi (type) == SYSV_ABI
5597 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5598 ccvt = ix86_get_callcvt (type);
5599 regparm = ix86_regparm;
5601 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5603 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5604 if (attr)
5606 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5607 return regparm;
5610 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5611 return 2;
5612 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5613 return 1;
5615 /* Use register calling convention for local functions when possible. */
5616 if (decl
5617 && TREE_CODE (decl) == FUNCTION_DECL
5618 /* Caller and callee must agree on the calling convention, so
5619 checking here just optimize means that with
5620 __attribute__((optimize (...))) caller could use regparm convention
5621 and callee not, or vice versa. Instead look at whether the callee
5622 is optimized or not. */
5623 && opt_for_fn (decl, optimize)
5624 && !(profile_flag && !flag_fentry))
5626 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5627 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5628 if (i && i->local && i->can_change_signature)
5630 int local_regparm, globals = 0, regno;
5632 /* Make sure no regparm register is taken by a
5633 fixed register variable. */
5634 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5635 if (fixed_regs[local_regparm])
5636 break;
5638 /* We don't want to use regparm(3) for nested functions as
5639 these use a static chain pointer in the third argument. */
5640 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5641 local_regparm = 2;
5643 /* In 32-bit mode save a register for the split stack. */
5644 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5645 local_regparm = 2;
5647 /* Each fixed register usage increases register pressure,
5648 so less registers should be used for argument passing.
5649 This functionality can be overriden by an explicit
5650 regparm value. */
5651 for (regno = AX_REG; regno <= DI_REG; regno++)
5652 if (fixed_regs[regno])
5653 globals++;
5655 local_regparm
5656 = globals < local_regparm ? local_regparm - globals : 0;
5658 if (local_regparm > regparm)
5659 regparm = local_regparm;
5663 return regparm;
5666 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5667 DFmode (2) arguments in SSE registers for a function with the
5668 indicated TYPE and DECL. DECL may be NULL when calling function
5669 indirectly or considering a libcall. Otherwise return 0. */
5671 static int
5672 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5674 gcc_assert (!TARGET_64BIT);
5676 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5677 by the sseregparm attribute. */
5678 if (TARGET_SSEREGPARM
5679 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5681 if (!TARGET_SSE)
5683 if (warn)
5685 if (decl)
5686 error ("calling %qD with attribute sseregparm without "
5687 "SSE/SSE2 enabled", decl);
5688 else
5689 error ("calling %qT with attribute sseregparm without "
5690 "SSE/SSE2 enabled", type);
5692 return 0;
5695 return 2;
5698 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5699 (and DFmode for SSE2) arguments in SSE registers. */
5700 if (decl && TARGET_SSE_MATH && optimize
5701 && !(profile_flag && !flag_fentry))
5703 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5704 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5705 if (i && i->local && i->can_change_signature)
5706 return TARGET_SSE2 ? 2 : 1;
5709 return 0;
5712 /* Return true if EAX is live at the start of the function. Used by
5713 ix86_expand_prologue to determine if we need special help before
5714 calling allocate_stack_worker. */
5716 static bool
5717 ix86_eax_live_at_start_p (void)
5719 /* Cheat. Don't bother working forward from ix86_function_regparm
5720 to the function type to whether an actual argument is located in
5721 eax. Instead just look at cfg info, which is still close enough
5722 to correct at this point. This gives false positives for broken
5723 functions that might use uninitialized data that happens to be
5724 allocated in eax, but who cares? */
5725 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5728 static bool
5729 ix86_keep_aggregate_return_pointer (tree fntype)
5731 tree attr;
5733 if (!TARGET_64BIT)
5735 attr = lookup_attribute ("callee_pop_aggregate_return",
5736 TYPE_ATTRIBUTES (fntype));
5737 if (attr)
5738 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5740 /* For 32-bit MS-ABI the default is to keep aggregate
5741 return pointer. */
5742 if (ix86_function_type_abi (fntype) == MS_ABI)
5743 return true;
5745 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5748 /* Value is the number of bytes of arguments automatically
5749 popped when returning from a subroutine call.
5750 FUNDECL is the declaration node of the function (as a tree),
5751 FUNTYPE is the data type of the function (as a tree),
5752 or for a library call it is an identifier node for the subroutine name.
5753 SIZE is the number of bytes of arguments passed on the stack.
5755 On the 80386, the RTD insn may be used to pop them if the number
5756 of args is fixed, but if the number is variable then the caller
5757 must pop them all. RTD can't be used for library calls now
5758 because the library is compiled with the Unix compiler.
5759 Use of RTD is a selectable option, since it is incompatible with
5760 standard Unix calling sequences. If the option is not selected,
5761 the caller must always pop the args.
5763 The attribute stdcall is equivalent to RTD on a per module basis. */
5765 static int
5766 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5768 unsigned int ccvt;
5770 /* None of the 64-bit ABIs pop arguments. */
5771 if (TARGET_64BIT)
5772 return 0;
5774 ccvt = ix86_get_callcvt (funtype);
5776 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5777 | IX86_CALLCVT_THISCALL)) != 0
5778 && ! stdarg_p (funtype))
5779 return size;
5781 /* Lose any fake structure return argument if it is passed on the stack. */
5782 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5783 && !ix86_keep_aggregate_return_pointer (funtype))
5785 int nregs = ix86_function_regparm (funtype, fundecl);
5786 if (nregs == 0)
5787 return GET_MODE_SIZE (Pmode);
5790 return 0;
5793 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5795 static bool
5796 ix86_legitimate_combined_insn (rtx insn)
5798 /* Check operand constraints in case hard registers were propagated
5799 into insn pattern. This check prevents combine pass from
5800 generating insn patterns with invalid hard register operands.
5801 These invalid insns can eventually confuse reload to error out
5802 with a spill failure. See also PRs 46829 and 46843. */
5803 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5805 int i;
5807 extract_insn (insn);
5808 preprocess_constraints ();
5810 for (i = 0; i < recog_data.n_operands; i++)
5812 rtx op = recog_data.operand[i];
5813 enum machine_mode mode = GET_MODE (op);
5814 struct operand_alternative *op_alt;
5815 int offset = 0;
5816 bool win;
5817 int j;
5819 /* For pre-AVX disallow unaligned loads/stores where the
5820 instructions don't support it. */
5821 if (!TARGET_AVX
5822 && VECTOR_MODE_P (GET_MODE (op))
5823 && misaligned_operand (op, GET_MODE (op)))
5825 int min_align = get_attr_ssememalign (insn);
5826 if (min_align == 0)
5827 return false;
5830 /* A unary operator may be accepted by the predicate, but it
5831 is irrelevant for matching constraints. */
5832 if (UNARY_P (op))
5833 op = XEXP (op, 0);
5835 if (GET_CODE (op) == SUBREG)
5837 if (REG_P (SUBREG_REG (op))
5838 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5839 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5840 GET_MODE (SUBREG_REG (op)),
5841 SUBREG_BYTE (op),
5842 GET_MODE (op));
5843 op = SUBREG_REG (op);
5846 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5847 continue;
5849 op_alt = recog_op_alt[i];
5851 /* Operand has no constraints, anything is OK. */
5852 win = !recog_data.n_alternatives;
5854 for (j = 0; j < recog_data.n_alternatives; j++)
5856 if (op_alt[j].anything_ok
5857 || (op_alt[j].matches != -1
5858 && operands_match_p
5859 (recog_data.operand[i],
5860 recog_data.operand[op_alt[j].matches]))
5861 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5863 win = true;
5864 break;
5868 if (!win)
5869 return false;
5873 return true;
5876 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5878 static unsigned HOST_WIDE_INT
5879 ix86_asan_shadow_offset (void)
5881 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5882 : HOST_WIDE_INT_C (0x7fff8000))
5883 : (HOST_WIDE_INT_1 << 29);
5886 /* Argument support functions. */
5888 /* Return true when register may be used to pass function parameters. */
5889 bool
5890 ix86_function_arg_regno_p (int regno)
5892 int i;
5893 const int *parm_regs;
5895 if (!TARGET_64BIT)
5897 if (TARGET_MACHO)
5898 return (regno < REGPARM_MAX
5899 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5900 else
5901 return (regno < REGPARM_MAX
5902 || (TARGET_MMX && MMX_REGNO_P (regno)
5903 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5904 || (TARGET_SSE && SSE_REGNO_P (regno)
5905 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5908 if (TARGET_SSE && SSE_REGNO_P (regno)
5909 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5910 return true;
5912 /* TODO: The function should depend on current function ABI but
5913 builtins.c would need updating then. Therefore we use the
5914 default ABI. */
5916 /* RAX is used as hidden argument to va_arg functions. */
5917 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5918 return true;
5920 if (ix86_abi == MS_ABI)
5921 parm_regs = x86_64_ms_abi_int_parameter_registers;
5922 else
5923 parm_regs = x86_64_int_parameter_registers;
5924 for (i = 0; i < (ix86_abi == MS_ABI
5925 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5926 if (regno == parm_regs[i])
5927 return true;
5928 return false;
5931 /* Return if we do not know how to pass TYPE solely in registers. */
5933 static bool
5934 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5936 if (must_pass_in_stack_var_size_or_pad (mode, type))
5937 return true;
5939 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5940 The layout_type routine is crafty and tries to trick us into passing
5941 currently unsupported vector types on the stack by using TImode. */
5942 return (!TARGET_64BIT && mode == TImode
5943 && type && TREE_CODE (type) != VECTOR_TYPE);
5946 /* It returns the size, in bytes, of the area reserved for arguments passed
5947 in registers for the function represented by fndecl dependent to the used
5948 abi format. */
5950 ix86_reg_parm_stack_space (const_tree fndecl)
5952 enum calling_abi call_abi = SYSV_ABI;
5953 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5954 call_abi = ix86_function_abi (fndecl);
5955 else
5956 call_abi = ix86_function_type_abi (fndecl);
5957 if (TARGET_64BIT && call_abi == MS_ABI)
5958 return 32;
5959 return 0;
5962 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5963 call abi used. */
5964 enum calling_abi
5965 ix86_function_type_abi (const_tree fntype)
5967 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5969 enum calling_abi abi = ix86_abi;
5970 if (abi == SYSV_ABI)
5972 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5973 abi = MS_ABI;
5975 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5976 abi = SYSV_ABI;
5977 return abi;
5979 return ix86_abi;
5982 /* We add this as a workaround in order to use libc_has_function
5983 hook in i386.md. */
5984 bool
5985 ix86_libc_has_function (enum function_class fn_class)
5987 return targetm.libc_has_function (fn_class);
5990 static bool
5991 ix86_function_ms_hook_prologue (const_tree fn)
5993 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5995 if (decl_function_context (fn) != NULL_TREE)
5996 error_at (DECL_SOURCE_LOCATION (fn),
5997 "ms_hook_prologue is not compatible with nested function");
5998 else
5999 return true;
6001 return false;
6004 static enum calling_abi
6005 ix86_function_abi (const_tree fndecl)
6007 if (! fndecl)
6008 return ix86_abi;
6009 return ix86_function_type_abi (TREE_TYPE (fndecl));
6012 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6013 call abi used. */
6014 enum calling_abi
6015 ix86_cfun_abi (void)
6017 if (! cfun)
6018 return ix86_abi;
6019 return cfun->machine->call_abi;
6022 /* Write the extra assembler code needed to declare a function properly. */
6024 void
6025 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6026 tree decl)
6028 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6030 if (is_ms_hook)
6032 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6033 unsigned int filler_cc = 0xcccccccc;
6035 for (i = 0; i < filler_count; i += 4)
6036 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6039 #ifdef SUBTARGET_ASM_UNWIND_INIT
6040 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6041 #endif
6043 ASM_OUTPUT_LABEL (asm_out_file, fname);
6045 /* Output magic byte marker, if hot-patch attribute is set. */
6046 if (is_ms_hook)
6048 if (TARGET_64BIT)
6050 /* leaq [%rsp + 0], %rsp */
6051 asm_fprintf (asm_out_file, ASM_BYTE
6052 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6054 else
6056 /* movl.s %edi, %edi
6057 push %ebp
6058 movl.s %esp, %ebp */
6059 asm_fprintf (asm_out_file, ASM_BYTE
6060 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6065 /* regclass.c */
6066 extern void init_regs (void);
6068 /* Implementation of call abi switching target hook. Specific to FNDECL
6069 the specific call register sets are set. See also
6070 ix86_conditional_register_usage for more details. */
6071 void
6072 ix86_call_abi_override (const_tree fndecl)
6074 if (fndecl == NULL_TREE)
6075 cfun->machine->call_abi = ix86_abi;
6076 else
6077 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6080 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6081 expensive re-initialization of init_regs each time we switch function context
6082 since this is needed only during RTL expansion. */
6083 static void
6084 ix86_maybe_switch_abi (void)
6086 if (TARGET_64BIT &&
6087 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6088 reinit_regs ();
6091 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6092 for a call to a function whose data type is FNTYPE.
6093 For a library call, FNTYPE is 0. */
6095 void
6096 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6097 tree fntype, /* tree ptr for function decl */
6098 rtx libname, /* SYMBOL_REF of library name or 0 */
6099 tree fndecl,
6100 int caller)
6102 struct cgraph_local_info *i;
6104 memset (cum, 0, sizeof (*cum));
6106 if (fndecl)
6108 i = cgraph_local_info (fndecl);
6109 cum->call_abi = ix86_function_abi (fndecl);
6111 else
6113 i = NULL;
6114 cum->call_abi = ix86_function_type_abi (fntype);
6117 cum->caller = caller;
6119 /* Set up the number of registers to use for passing arguments. */
6120 cum->nregs = ix86_regparm;
6121 if (TARGET_64BIT)
6123 cum->nregs = (cum->call_abi == SYSV_ABI
6124 ? X86_64_REGPARM_MAX
6125 : X86_64_MS_REGPARM_MAX);
6127 if (TARGET_SSE)
6129 cum->sse_nregs = SSE_REGPARM_MAX;
6130 if (TARGET_64BIT)
6132 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6133 ? X86_64_SSE_REGPARM_MAX
6134 : X86_64_MS_SSE_REGPARM_MAX);
6137 if (TARGET_MMX)
6138 cum->mmx_nregs = MMX_REGPARM_MAX;
6139 cum->warn_avx512f = true;
6140 cum->warn_avx = true;
6141 cum->warn_sse = true;
6142 cum->warn_mmx = true;
6144 /* Because type might mismatch in between caller and callee, we need to
6145 use actual type of function for local calls.
6146 FIXME: cgraph_analyze can be told to actually record if function uses
6147 va_start so for local functions maybe_vaarg can be made aggressive
6148 helping K&R code.
6149 FIXME: once typesytem is fixed, we won't need this code anymore. */
6150 if (i && i->local && i->can_change_signature)
6151 fntype = TREE_TYPE (fndecl);
6152 cum->maybe_vaarg = (fntype
6153 ? (!prototype_p (fntype) || stdarg_p (fntype))
6154 : !libname);
6156 if (!TARGET_64BIT)
6158 /* If there are variable arguments, then we won't pass anything
6159 in registers in 32-bit mode. */
6160 if (stdarg_p (fntype))
6162 cum->nregs = 0;
6163 cum->sse_nregs = 0;
6164 cum->mmx_nregs = 0;
6165 cum->warn_avx512f = false;
6166 cum->warn_avx = false;
6167 cum->warn_sse = false;
6168 cum->warn_mmx = false;
6169 return;
6172 /* Use ecx and edx registers if function has fastcall attribute,
6173 else look for regparm information. */
6174 if (fntype)
6176 unsigned int ccvt = ix86_get_callcvt (fntype);
6177 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6179 cum->nregs = 1;
6180 cum->fastcall = 1; /* Same first register as in fastcall. */
6182 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6184 cum->nregs = 2;
6185 cum->fastcall = 1;
6187 else
6188 cum->nregs = ix86_function_regparm (fntype, fndecl);
6191 /* Set up the number of SSE registers used for passing SFmode
6192 and DFmode arguments. Warn for mismatching ABI. */
6193 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6197 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6198 But in the case of vector types, it is some vector mode.
6200 When we have only some of our vector isa extensions enabled, then there
6201 are some modes for which vector_mode_supported_p is false. For these
6202 modes, the generic vector support in gcc will choose some non-vector mode
6203 in order to implement the type. By computing the natural mode, we'll
6204 select the proper ABI location for the operand and not depend on whatever
6205 the middle-end decides to do with these vector types.
6207 The midde-end can't deal with the vector types > 16 bytes. In this
6208 case, we return the original mode and warn ABI change if CUM isn't
6209 NULL.
6211 If INT_RETURN is true, warn ABI change if the vector mode isn't
6212 available for function return value. */
6214 static enum machine_mode
6215 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6216 bool in_return)
6218 enum machine_mode mode = TYPE_MODE (type);
6220 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6222 HOST_WIDE_INT size = int_size_in_bytes (type);
6223 if ((size == 8 || size == 16 || size == 32 || size == 64)
6224 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6225 && TYPE_VECTOR_SUBPARTS (type) > 1)
6227 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6229 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6230 mode = MIN_MODE_VECTOR_FLOAT;
6231 else
6232 mode = MIN_MODE_VECTOR_INT;
6234 /* Get the mode which has this inner mode and number of units. */
6235 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6236 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6237 && GET_MODE_INNER (mode) == innermode)
6239 if (size == 64 && !TARGET_AVX512F)
6241 static bool warnedavx512f;
6242 static bool warnedavx512f_ret;
6244 if (cum && cum->warn_avx512f && !warnedavx512f)
6246 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6247 "without AVX512F enabled changes the ABI"))
6248 warnedavx512f = true;
6250 else if (in_return && !warnedavx512f_ret)
6252 if (warning (OPT_Wpsabi, "AVX512F vector return "
6253 "without AVX512F enabled changes the ABI"))
6254 warnedavx512f_ret = true;
6257 return TYPE_MODE (type);
6259 else if (size == 32 && !TARGET_AVX)
6261 static bool warnedavx;
6262 static bool warnedavx_ret;
6264 if (cum && cum->warn_avx && !warnedavx)
6266 if (warning (OPT_Wpsabi, "AVX vector argument "
6267 "without AVX enabled changes the ABI"))
6268 warnedavx = true;
6270 else if (in_return && !warnedavx_ret)
6272 if (warning (OPT_Wpsabi, "AVX vector return "
6273 "without AVX enabled changes the ABI"))
6274 warnedavx_ret = true;
6277 return TYPE_MODE (type);
6279 else if (((size == 8 && TARGET_64BIT) || size == 16)
6280 && !TARGET_SSE)
6282 static bool warnedsse;
6283 static bool warnedsse_ret;
6285 if (cum && cum->warn_sse && !warnedsse)
6287 if (warning (OPT_Wpsabi, "SSE vector argument "
6288 "without SSE enabled changes the ABI"))
6289 warnedsse = true;
6291 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6293 if (warning (OPT_Wpsabi, "SSE vector return "
6294 "without SSE enabled changes the ABI"))
6295 warnedsse_ret = true;
6298 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6300 static bool warnedmmx;
6301 static bool warnedmmx_ret;
6303 if (cum && cum->warn_mmx && !warnedmmx)
6305 if (warning (OPT_Wpsabi, "MMX vector argument "
6306 "without MMX enabled changes the ABI"))
6307 warnedmmx = true;
6309 else if (in_return && !warnedmmx_ret)
6311 if (warning (OPT_Wpsabi, "MMX vector return "
6312 "without MMX enabled changes the ABI"))
6313 warnedmmx_ret = true;
6316 return mode;
6319 gcc_unreachable ();
6323 return mode;
6326 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6327 this may not agree with the mode that the type system has chosen for the
6328 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6329 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6331 static rtx
6332 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6333 unsigned int regno)
6335 rtx tmp;
6337 if (orig_mode != BLKmode)
6338 tmp = gen_rtx_REG (orig_mode, regno);
6339 else
6341 tmp = gen_rtx_REG (mode, regno);
6342 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6343 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6346 return tmp;
6349 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6350 of this code is to classify each 8bytes of incoming argument by the register
6351 class and assign registers accordingly. */
6353 /* Return the union class of CLASS1 and CLASS2.
6354 See the x86-64 PS ABI for details. */
6356 static enum x86_64_reg_class
6357 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6359 /* Rule #1: If both classes are equal, this is the resulting class. */
6360 if (class1 == class2)
6361 return class1;
6363 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6364 the other class. */
6365 if (class1 == X86_64_NO_CLASS)
6366 return class2;
6367 if (class2 == X86_64_NO_CLASS)
6368 return class1;
6370 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6371 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6372 return X86_64_MEMORY_CLASS;
6374 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6375 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6376 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6377 return X86_64_INTEGERSI_CLASS;
6378 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6379 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6380 return X86_64_INTEGER_CLASS;
6382 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6383 MEMORY is used. */
6384 if (class1 == X86_64_X87_CLASS
6385 || class1 == X86_64_X87UP_CLASS
6386 || class1 == X86_64_COMPLEX_X87_CLASS
6387 || class2 == X86_64_X87_CLASS
6388 || class2 == X86_64_X87UP_CLASS
6389 || class2 == X86_64_COMPLEX_X87_CLASS)
6390 return X86_64_MEMORY_CLASS;
6392 /* Rule #6: Otherwise class SSE is used. */
6393 return X86_64_SSE_CLASS;
6396 /* Classify the argument of type TYPE and mode MODE.
6397 CLASSES will be filled by the register class used to pass each word
6398 of the operand. The number of words is returned. In case the parameter
6399 should be passed in memory, 0 is returned. As a special case for zero
6400 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6402 BIT_OFFSET is used internally for handling records and specifies offset
6403 of the offset in bits modulo 512 to avoid overflow cases.
6405 See the x86-64 PS ABI for details.
6408 static int
6409 classify_argument (enum machine_mode mode, const_tree type,
6410 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6412 HOST_WIDE_INT bytes =
6413 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6414 int words
6415 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6417 /* Variable sized entities are always passed/returned in memory. */
6418 if (bytes < 0)
6419 return 0;
6421 if (mode != VOIDmode
6422 && targetm.calls.must_pass_in_stack (mode, type))
6423 return 0;
6425 if (type && AGGREGATE_TYPE_P (type))
6427 int i;
6428 tree field;
6429 enum x86_64_reg_class subclasses[MAX_CLASSES];
6431 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6432 if (bytes > 64)
6433 return 0;
6435 for (i = 0; i < words; i++)
6436 classes[i] = X86_64_NO_CLASS;
6438 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6439 signalize memory class, so handle it as special case. */
6440 if (!words)
6442 classes[0] = X86_64_NO_CLASS;
6443 return 1;
6446 /* Classify each field of record and merge classes. */
6447 switch (TREE_CODE (type))
6449 case RECORD_TYPE:
6450 /* And now merge the fields of structure. */
6451 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6453 if (TREE_CODE (field) == FIELD_DECL)
6455 int num;
6457 if (TREE_TYPE (field) == error_mark_node)
6458 continue;
6460 /* Bitfields are always classified as integer. Handle them
6461 early, since later code would consider them to be
6462 misaligned integers. */
6463 if (DECL_BIT_FIELD (field))
6465 for (i = (int_bit_position (field)
6466 + (bit_offset % 64)) / 8 / 8;
6467 i < ((int_bit_position (field) + (bit_offset % 64))
6468 + tree_to_shwi (DECL_SIZE (field))
6469 + 63) / 8 / 8; i++)
6470 classes[i] =
6471 merge_classes (X86_64_INTEGER_CLASS,
6472 classes[i]);
6474 else
6476 int pos;
6478 type = TREE_TYPE (field);
6480 /* Flexible array member is ignored. */
6481 if (TYPE_MODE (type) == BLKmode
6482 && TREE_CODE (type) == ARRAY_TYPE
6483 && TYPE_SIZE (type) == NULL_TREE
6484 && TYPE_DOMAIN (type) != NULL_TREE
6485 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6486 == NULL_TREE))
6488 static bool warned;
6490 if (!warned && warn_psabi)
6492 warned = true;
6493 inform (input_location,
6494 "the ABI of passing struct with"
6495 " a flexible array member has"
6496 " changed in GCC 4.4");
6498 continue;
6500 num = classify_argument (TYPE_MODE (type), type,
6501 subclasses,
6502 (int_bit_position (field)
6503 + bit_offset) % 512);
6504 if (!num)
6505 return 0;
6506 pos = (int_bit_position (field)
6507 + (bit_offset % 64)) / 8 / 8;
6508 for (i = 0; i < num && (i + pos) < words; i++)
6509 classes[i + pos] =
6510 merge_classes (subclasses[i], classes[i + pos]);
6514 break;
6516 case ARRAY_TYPE:
6517 /* Arrays are handled as small records. */
6519 int num;
6520 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6521 TREE_TYPE (type), subclasses, bit_offset);
6522 if (!num)
6523 return 0;
6525 /* The partial classes are now full classes. */
6526 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6527 subclasses[0] = X86_64_SSE_CLASS;
6528 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6529 && !((bit_offset % 64) == 0 && bytes == 4))
6530 subclasses[0] = X86_64_INTEGER_CLASS;
6532 for (i = 0; i < words; i++)
6533 classes[i] = subclasses[i % num];
6535 break;
6537 case UNION_TYPE:
6538 case QUAL_UNION_TYPE:
6539 /* Unions are similar to RECORD_TYPE but offset is always 0.
6541 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6543 if (TREE_CODE (field) == FIELD_DECL)
6545 int num;
6547 if (TREE_TYPE (field) == error_mark_node)
6548 continue;
6550 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6551 TREE_TYPE (field), subclasses,
6552 bit_offset);
6553 if (!num)
6554 return 0;
6555 for (i = 0; i < num; i++)
6556 classes[i] = merge_classes (subclasses[i], classes[i]);
6559 break;
6561 default:
6562 gcc_unreachable ();
6565 if (words > 2)
6567 /* When size > 16 bytes, if the first one isn't
6568 X86_64_SSE_CLASS or any other ones aren't
6569 X86_64_SSEUP_CLASS, everything should be passed in
6570 memory. */
6571 if (classes[0] != X86_64_SSE_CLASS)
6572 return 0;
6574 for (i = 1; i < words; i++)
6575 if (classes[i] != X86_64_SSEUP_CLASS)
6576 return 0;
6579 /* Final merger cleanup. */
6580 for (i = 0; i < words; i++)
6582 /* If one class is MEMORY, everything should be passed in
6583 memory. */
6584 if (classes[i] == X86_64_MEMORY_CLASS)
6585 return 0;
6587 /* The X86_64_SSEUP_CLASS should be always preceded by
6588 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6589 if (classes[i] == X86_64_SSEUP_CLASS
6590 && classes[i - 1] != X86_64_SSE_CLASS
6591 && classes[i - 1] != X86_64_SSEUP_CLASS)
6593 /* The first one should never be X86_64_SSEUP_CLASS. */
6594 gcc_assert (i != 0);
6595 classes[i] = X86_64_SSE_CLASS;
6598 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6599 everything should be passed in memory. */
6600 if (classes[i] == X86_64_X87UP_CLASS
6601 && (classes[i - 1] != X86_64_X87_CLASS))
6603 static bool warned;
6605 /* The first one should never be X86_64_X87UP_CLASS. */
6606 gcc_assert (i != 0);
6607 if (!warned && warn_psabi)
6609 warned = true;
6610 inform (input_location,
6611 "the ABI of passing union with long double"
6612 " has changed in GCC 4.4");
6614 return 0;
6617 return words;
6620 /* Compute alignment needed. We align all types to natural boundaries with
6621 exception of XFmode that is aligned to 64bits. */
6622 if (mode != VOIDmode && mode != BLKmode)
6624 int mode_alignment = GET_MODE_BITSIZE (mode);
6626 if (mode == XFmode)
6627 mode_alignment = 128;
6628 else if (mode == XCmode)
6629 mode_alignment = 256;
6630 if (COMPLEX_MODE_P (mode))
6631 mode_alignment /= 2;
6632 /* Misaligned fields are always returned in memory. */
6633 if (bit_offset % mode_alignment)
6634 return 0;
6637 /* for V1xx modes, just use the base mode */
6638 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6639 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6640 mode = GET_MODE_INNER (mode);
6642 /* Classification of atomic types. */
6643 switch (mode)
6645 case SDmode:
6646 case DDmode:
6647 classes[0] = X86_64_SSE_CLASS;
6648 return 1;
6649 case TDmode:
6650 classes[0] = X86_64_SSE_CLASS;
6651 classes[1] = X86_64_SSEUP_CLASS;
6652 return 2;
6653 case DImode:
6654 case SImode:
6655 case HImode:
6656 case QImode:
6657 case CSImode:
6658 case CHImode:
6659 case CQImode:
6661 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6663 /* Analyze last 128 bits only. */
6664 size = (size - 1) & 0x7f;
6666 if (size < 32)
6668 classes[0] = X86_64_INTEGERSI_CLASS;
6669 return 1;
6671 else if (size < 64)
6673 classes[0] = X86_64_INTEGER_CLASS;
6674 return 1;
6676 else if (size < 64+32)
6678 classes[0] = X86_64_INTEGER_CLASS;
6679 classes[1] = X86_64_INTEGERSI_CLASS;
6680 return 2;
6682 else if (size < 64+64)
6684 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6685 return 2;
6687 else
6688 gcc_unreachable ();
6690 case CDImode:
6691 case TImode:
6692 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6693 return 2;
6694 case COImode:
6695 case OImode:
6696 /* OImode shouldn't be used directly. */
6697 gcc_unreachable ();
6698 case CTImode:
6699 return 0;
6700 case SFmode:
6701 if (!(bit_offset % 64))
6702 classes[0] = X86_64_SSESF_CLASS;
6703 else
6704 classes[0] = X86_64_SSE_CLASS;
6705 return 1;
6706 case DFmode:
6707 classes[0] = X86_64_SSEDF_CLASS;
6708 return 1;
6709 case XFmode:
6710 classes[0] = X86_64_X87_CLASS;
6711 classes[1] = X86_64_X87UP_CLASS;
6712 return 2;
6713 case TFmode:
6714 classes[0] = X86_64_SSE_CLASS;
6715 classes[1] = X86_64_SSEUP_CLASS;
6716 return 2;
6717 case SCmode:
6718 classes[0] = X86_64_SSE_CLASS;
6719 if (!(bit_offset % 64))
6720 return 1;
6721 else
6723 static bool warned;
6725 if (!warned && warn_psabi)
6727 warned = true;
6728 inform (input_location,
6729 "the ABI of passing structure with complex float"
6730 " member has changed in GCC 4.4");
6732 classes[1] = X86_64_SSESF_CLASS;
6733 return 2;
6735 case DCmode:
6736 classes[0] = X86_64_SSEDF_CLASS;
6737 classes[1] = X86_64_SSEDF_CLASS;
6738 return 2;
6739 case XCmode:
6740 classes[0] = X86_64_COMPLEX_X87_CLASS;
6741 return 1;
6742 case TCmode:
6743 /* This modes is larger than 16 bytes. */
6744 return 0;
6745 case V8SFmode:
6746 case V8SImode:
6747 case V32QImode:
6748 case V16HImode:
6749 case V4DFmode:
6750 case V4DImode:
6751 classes[0] = X86_64_SSE_CLASS;
6752 classes[1] = X86_64_SSEUP_CLASS;
6753 classes[2] = X86_64_SSEUP_CLASS;
6754 classes[3] = X86_64_SSEUP_CLASS;
6755 return 4;
6756 case V8DFmode:
6757 case V16SFmode:
6758 case V8DImode:
6759 case V16SImode:
6760 case V32HImode:
6761 case V64QImode:
6762 classes[0] = X86_64_SSE_CLASS;
6763 classes[1] = X86_64_SSEUP_CLASS;
6764 classes[2] = X86_64_SSEUP_CLASS;
6765 classes[3] = X86_64_SSEUP_CLASS;
6766 classes[4] = X86_64_SSEUP_CLASS;
6767 classes[5] = X86_64_SSEUP_CLASS;
6768 classes[6] = X86_64_SSEUP_CLASS;
6769 classes[7] = X86_64_SSEUP_CLASS;
6770 return 8;
6771 case V4SFmode:
6772 case V4SImode:
6773 case V16QImode:
6774 case V8HImode:
6775 case V2DFmode:
6776 case V2DImode:
6777 classes[0] = X86_64_SSE_CLASS;
6778 classes[1] = X86_64_SSEUP_CLASS;
6779 return 2;
6780 case V1TImode:
6781 case V1DImode:
6782 case V2SFmode:
6783 case V2SImode:
6784 case V4HImode:
6785 case V8QImode:
6786 classes[0] = X86_64_SSE_CLASS;
6787 return 1;
6788 case BLKmode:
6789 case VOIDmode:
6790 return 0;
6791 default:
6792 gcc_assert (VECTOR_MODE_P (mode));
6794 if (bytes > 16)
6795 return 0;
6797 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6799 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6800 classes[0] = X86_64_INTEGERSI_CLASS;
6801 else
6802 classes[0] = X86_64_INTEGER_CLASS;
6803 classes[1] = X86_64_INTEGER_CLASS;
6804 return 1 + (bytes > 8);
6808 /* Examine the argument and return set number of register required in each
6809 class. Return true iff parameter should be passed in memory. */
6811 static bool
6812 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6813 int *int_nregs, int *sse_nregs)
6815 enum x86_64_reg_class regclass[MAX_CLASSES];
6816 int n = classify_argument (mode, type, regclass, 0);
6818 *int_nregs = 0;
6819 *sse_nregs = 0;
6821 if (!n)
6822 return true;
6823 for (n--; n >= 0; n--)
6824 switch (regclass[n])
6826 case X86_64_INTEGER_CLASS:
6827 case X86_64_INTEGERSI_CLASS:
6828 (*int_nregs)++;
6829 break;
6830 case X86_64_SSE_CLASS:
6831 case X86_64_SSESF_CLASS:
6832 case X86_64_SSEDF_CLASS:
6833 (*sse_nregs)++;
6834 break;
6835 case X86_64_NO_CLASS:
6836 case X86_64_SSEUP_CLASS:
6837 break;
6838 case X86_64_X87_CLASS:
6839 case X86_64_X87UP_CLASS:
6840 case X86_64_COMPLEX_X87_CLASS:
6841 if (!in_return)
6842 return true;
6843 break;
6844 case X86_64_MEMORY_CLASS:
6845 gcc_unreachable ();
6848 return false;
6851 /* Construct container for the argument used by GCC interface. See
6852 FUNCTION_ARG for the detailed description. */
6854 static rtx
6855 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6856 const_tree type, int in_return, int nintregs, int nsseregs,
6857 const int *intreg, int sse_regno)
6859 /* The following variables hold the static issued_error state. */
6860 static bool issued_sse_arg_error;
6861 static bool issued_sse_ret_error;
6862 static bool issued_x87_ret_error;
6864 enum machine_mode tmpmode;
6865 int bytes =
6866 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6867 enum x86_64_reg_class regclass[MAX_CLASSES];
6868 int n;
6869 int i;
6870 int nexps = 0;
6871 int needed_sseregs, needed_intregs;
6872 rtx exp[MAX_CLASSES];
6873 rtx ret;
6875 n = classify_argument (mode, type, regclass, 0);
6876 if (!n)
6877 return NULL;
6878 if (examine_argument (mode, type, in_return, &needed_intregs,
6879 &needed_sseregs))
6880 return NULL;
6881 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6882 return NULL;
6884 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6885 some less clueful developer tries to use floating-point anyway. */
6886 if (needed_sseregs && !TARGET_SSE)
6888 if (in_return)
6890 if (!issued_sse_ret_error)
6892 error ("SSE register return with SSE disabled");
6893 issued_sse_ret_error = true;
6896 else if (!issued_sse_arg_error)
6898 error ("SSE register argument with SSE disabled");
6899 issued_sse_arg_error = true;
6901 return NULL;
6904 /* Likewise, error if the ABI requires us to return values in the
6905 x87 registers and the user specified -mno-80387. */
6906 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6907 for (i = 0; i < n; i++)
6908 if (regclass[i] == X86_64_X87_CLASS
6909 || regclass[i] == X86_64_X87UP_CLASS
6910 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6912 if (!issued_x87_ret_error)
6914 error ("x87 register return with x87 disabled");
6915 issued_x87_ret_error = true;
6917 return NULL;
6920 /* First construct simple cases. Avoid SCmode, since we want to use
6921 single register to pass this type. */
6922 if (n == 1 && mode != SCmode)
6923 switch (regclass[0])
6925 case X86_64_INTEGER_CLASS:
6926 case X86_64_INTEGERSI_CLASS:
6927 return gen_rtx_REG (mode, intreg[0]);
6928 case X86_64_SSE_CLASS:
6929 case X86_64_SSESF_CLASS:
6930 case X86_64_SSEDF_CLASS:
6931 if (mode != BLKmode)
6932 return gen_reg_or_parallel (mode, orig_mode,
6933 SSE_REGNO (sse_regno));
6934 break;
6935 case X86_64_X87_CLASS:
6936 case X86_64_COMPLEX_X87_CLASS:
6937 return gen_rtx_REG (mode, FIRST_STACK_REG);
6938 case X86_64_NO_CLASS:
6939 /* Zero sized array, struct or class. */
6940 return NULL;
6941 default:
6942 gcc_unreachable ();
6944 if (n == 2
6945 && regclass[0] == X86_64_SSE_CLASS
6946 && regclass[1] == X86_64_SSEUP_CLASS
6947 && mode != BLKmode)
6948 return gen_reg_or_parallel (mode, orig_mode,
6949 SSE_REGNO (sse_regno));
6950 if (n == 4
6951 && regclass[0] == X86_64_SSE_CLASS
6952 && regclass[1] == X86_64_SSEUP_CLASS
6953 && regclass[2] == X86_64_SSEUP_CLASS
6954 && regclass[3] == X86_64_SSEUP_CLASS
6955 && mode != BLKmode)
6956 return gen_reg_or_parallel (mode, orig_mode,
6957 SSE_REGNO (sse_regno));
6958 if (n == 8
6959 && regclass[0] == X86_64_SSE_CLASS
6960 && regclass[1] == X86_64_SSEUP_CLASS
6961 && regclass[2] == X86_64_SSEUP_CLASS
6962 && regclass[3] == X86_64_SSEUP_CLASS
6963 && regclass[4] == X86_64_SSEUP_CLASS
6964 && regclass[5] == X86_64_SSEUP_CLASS
6965 && regclass[6] == X86_64_SSEUP_CLASS
6966 && regclass[7] == X86_64_SSEUP_CLASS
6967 && mode != BLKmode)
6968 return gen_reg_or_parallel (mode, orig_mode,
6969 SSE_REGNO (sse_regno));
6970 if (n == 2
6971 && regclass[0] == X86_64_X87_CLASS
6972 && regclass[1] == X86_64_X87UP_CLASS)
6973 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6975 if (n == 2
6976 && regclass[0] == X86_64_INTEGER_CLASS
6977 && regclass[1] == X86_64_INTEGER_CLASS
6978 && (mode == CDImode || mode == TImode)
6979 && intreg[0] + 1 == intreg[1])
6980 return gen_rtx_REG (mode, intreg[0]);
6982 /* Otherwise figure out the entries of the PARALLEL. */
6983 for (i = 0; i < n; i++)
6985 int pos;
6987 switch (regclass[i])
6989 case X86_64_NO_CLASS:
6990 break;
6991 case X86_64_INTEGER_CLASS:
6992 case X86_64_INTEGERSI_CLASS:
6993 /* Merge TImodes on aligned occasions here too. */
6994 if (i * 8 + 8 > bytes)
6995 tmpmode
6996 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6997 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6998 tmpmode = SImode;
6999 else
7000 tmpmode = DImode;
7001 /* We've requested 24 bytes we
7002 don't have mode for. Use DImode. */
7003 if (tmpmode == BLKmode)
7004 tmpmode = DImode;
7005 exp [nexps++]
7006 = gen_rtx_EXPR_LIST (VOIDmode,
7007 gen_rtx_REG (tmpmode, *intreg),
7008 GEN_INT (i*8));
7009 intreg++;
7010 break;
7011 case X86_64_SSESF_CLASS:
7012 exp [nexps++]
7013 = gen_rtx_EXPR_LIST (VOIDmode,
7014 gen_rtx_REG (SFmode,
7015 SSE_REGNO (sse_regno)),
7016 GEN_INT (i*8));
7017 sse_regno++;
7018 break;
7019 case X86_64_SSEDF_CLASS:
7020 exp [nexps++]
7021 = gen_rtx_EXPR_LIST (VOIDmode,
7022 gen_rtx_REG (DFmode,
7023 SSE_REGNO (sse_regno)),
7024 GEN_INT (i*8));
7025 sse_regno++;
7026 break;
7027 case X86_64_SSE_CLASS:
7028 pos = i;
7029 switch (n)
7031 case 1:
7032 tmpmode = DImode;
7033 break;
7034 case 2:
7035 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7037 tmpmode = TImode;
7038 i++;
7040 else
7041 tmpmode = DImode;
7042 break;
7043 case 4:
7044 gcc_assert (i == 0
7045 && regclass[1] == X86_64_SSEUP_CLASS
7046 && regclass[2] == X86_64_SSEUP_CLASS
7047 && regclass[3] == X86_64_SSEUP_CLASS);
7048 tmpmode = OImode;
7049 i += 3;
7050 break;
7051 case 8:
7052 gcc_assert (i == 0
7053 && regclass[1] == X86_64_SSEUP_CLASS
7054 && regclass[2] == X86_64_SSEUP_CLASS
7055 && regclass[3] == X86_64_SSEUP_CLASS
7056 && regclass[4] == X86_64_SSEUP_CLASS
7057 && regclass[5] == X86_64_SSEUP_CLASS
7058 && regclass[6] == X86_64_SSEUP_CLASS
7059 && regclass[7] == X86_64_SSEUP_CLASS);
7060 tmpmode = XImode;
7061 i += 7;
7062 break;
7063 default:
7064 gcc_unreachable ();
7066 exp [nexps++]
7067 = gen_rtx_EXPR_LIST (VOIDmode,
7068 gen_rtx_REG (tmpmode,
7069 SSE_REGNO (sse_regno)),
7070 GEN_INT (pos*8));
7071 sse_regno++;
7072 break;
7073 default:
7074 gcc_unreachable ();
7078 /* Empty aligned struct, union or class. */
7079 if (nexps == 0)
7080 return NULL;
7082 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7083 for (i = 0; i < nexps; i++)
7084 XVECEXP (ret, 0, i) = exp [i];
7085 return ret;
7088 /* Update the data in CUM to advance over an argument of mode MODE
7089 and data type TYPE. (TYPE is null for libcalls where that information
7090 may not be available.) */
7092 static void
7093 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7094 const_tree type, HOST_WIDE_INT bytes,
7095 HOST_WIDE_INT words)
7097 switch (mode)
7099 default:
7100 break;
7102 case BLKmode:
7103 if (bytes < 0)
7104 break;
7105 /* FALLTHRU */
7107 case DImode:
7108 case SImode:
7109 case HImode:
7110 case QImode:
7111 cum->words += words;
7112 cum->nregs -= words;
7113 cum->regno += words;
7115 if (cum->nregs <= 0)
7117 cum->nregs = 0;
7118 cum->regno = 0;
7120 break;
7122 case OImode:
7123 /* OImode shouldn't be used directly. */
7124 gcc_unreachable ();
7126 case DFmode:
7127 if (cum->float_in_sse < 2)
7128 break;
7129 case SFmode:
7130 if (cum->float_in_sse < 1)
7131 break;
7132 /* FALLTHRU */
7134 case V8SFmode:
7135 case V8SImode:
7136 case V64QImode:
7137 case V32HImode:
7138 case V16SImode:
7139 case V8DImode:
7140 case V16SFmode:
7141 case V8DFmode:
7142 case V32QImode:
7143 case V16HImode:
7144 case V4DFmode:
7145 case V4DImode:
7146 case TImode:
7147 case V16QImode:
7148 case V8HImode:
7149 case V4SImode:
7150 case V2DImode:
7151 case V4SFmode:
7152 case V2DFmode:
7153 if (!type || !AGGREGATE_TYPE_P (type))
7155 cum->sse_words += words;
7156 cum->sse_nregs -= 1;
7157 cum->sse_regno += 1;
7158 if (cum->sse_nregs <= 0)
7160 cum->sse_nregs = 0;
7161 cum->sse_regno = 0;
7164 break;
7166 case V8QImode:
7167 case V4HImode:
7168 case V2SImode:
7169 case V2SFmode:
7170 case V1TImode:
7171 case V1DImode:
7172 if (!type || !AGGREGATE_TYPE_P (type))
7174 cum->mmx_words += words;
7175 cum->mmx_nregs -= 1;
7176 cum->mmx_regno += 1;
7177 if (cum->mmx_nregs <= 0)
7179 cum->mmx_nregs = 0;
7180 cum->mmx_regno = 0;
7183 break;
7187 static void
7188 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7189 const_tree type, HOST_WIDE_INT words, bool named)
7191 int int_nregs, sse_nregs;
7193 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7194 if (!named && (VALID_AVX512F_REG_MODE (mode)
7195 || VALID_AVX256_REG_MODE (mode)))
7196 return;
7198 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7199 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7201 cum->nregs -= int_nregs;
7202 cum->sse_nregs -= sse_nregs;
7203 cum->regno += int_nregs;
7204 cum->sse_regno += sse_nregs;
7206 else
7208 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7209 cum->words = (cum->words + align - 1) & ~(align - 1);
7210 cum->words += words;
7214 static void
7215 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7216 HOST_WIDE_INT words)
7218 /* Otherwise, this should be passed indirect. */
7219 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7221 cum->words += words;
7222 if (cum->nregs > 0)
7224 cum->nregs -= 1;
7225 cum->regno += 1;
7229 /* Update the data in CUM to advance over an argument of mode MODE and
7230 data type TYPE. (TYPE is null for libcalls where that information
7231 may not be available.) */
7233 static void
7234 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7235 const_tree type, bool named)
7237 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7238 HOST_WIDE_INT bytes, words;
7240 if (mode == BLKmode)
7241 bytes = int_size_in_bytes (type);
7242 else
7243 bytes = GET_MODE_SIZE (mode);
7244 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7246 if (type)
7247 mode = type_natural_mode (type, NULL, false);
7249 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7250 function_arg_advance_ms_64 (cum, bytes, words);
7251 else if (TARGET_64BIT)
7252 function_arg_advance_64 (cum, mode, type, words, named);
7253 else
7254 function_arg_advance_32 (cum, mode, type, bytes, words);
7257 /* Define where to put the arguments to a function.
7258 Value is zero to push the argument on the stack,
7259 or a hard register in which to store the argument.
7261 MODE is the argument's machine mode.
7262 TYPE is the data type of the argument (as a tree).
7263 This is null for libcalls where that information may
7264 not be available.
7265 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7266 the preceding args and about the function being called.
7267 NAMED is nonzero if this argument is a named parameter
7268 (otherwise it is an extra parameter matching an ellipsis). */
7270 static rtx
7271 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7272 enum machine_mode orig_mode, const_tree type,
7273 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7275 /* Avoid the AL settings for the Unix64 ABI. */
7276 if (mode == VOIDmode)
7277 return constm1_rtx;
7279 switch (mode)
7281 default:
7282 break;
7284 case BLKmode:
7285 if (bytes < 0)
7286 break;
7287 /* FALLTHRU */
7288 case DImode:
7289 case SImode:
7290 case HImode:
7291 case QImode:
7292 if (words <= cum->nregs)
7294 int regno = cum->regno;
7296 /* Fastcall allocates the first two DWORD (SImode) or
7297 smaller arguments to ECX and EDX if it isn't an
7298 aggregate type . */
7299 if (cum->fastcall)
7301 if (mode == BLKmode
7302 || mode == DImode
7303 || (type && AGGREGATE_TYPE_P (type)))
7304 break;
7306 /* ECX not EAX is the first allocated register. */
7307 if (regno == AX_REG)
7308 regno = CX_REG;
7310 return gen_rtx_REG (mode, regno);
7312 break;
7314 case DFmode:
7315 if (cum->float_in_sse < 2)
7316 break;
7317 case SFmode:
7318 if (cum->float_in_sse < 1)
7319 break;
7320 /* FALLTHRU */
7321 case TImode:
7322 /* In 32bit, we pass TImode in xmm registers. */
7323 case V16QImode:
7324 case V8HImode:
7325 case V4SImode:
7326 case V2DImode:
7327 case V4SFmode:
7328 case V2DFmode:
7329 if (!type || !AGGREGATE_TYPE_P (type))
7331 if (cum->sse_nregs)
7332 return gen_reg_or_parallel (mode, orig_mode,
7333 cum->sse_regno + FIRST_SSE_REG);
7335 break;
7337 case OImode:
7338 case XImode:
7339 /* OImode and XImode shouldn't be used directly. */
7340 gcc_unreachable ();
7342 case V64QImode:
7343 case V32HImode:
7344 case V16SImode:
7345 case V8DImode:
7346 case V16SFmode:
7347 case V8DFmode:
7348 case V8SFmode:
7349 case V8SImode:
7350 case V32QImode:
7351 case V16HImode:
7352 case V4DFmode:
7353 case V4DImode:
7354 if (!type || !AGGREGATE_TYPE_P (type))
7356 if (cum->sse_nregs)
7357 return gen_reg_or_parallel (mode, orig_mode,
7358 cum->sse_regno + FIRST_SSE_REG);
7360 break;
7362 case V8QImode:
7363 case V4HImode:
7364 case V2SImode:
7365 case V2SFmode:
7366 case V1TImode:
7367 case V1DImode:
7368 if (!type || !AGGREGATE_TYPE_P (type))
7370 if (cum->mmx_nregs)
7371 return gen_reg_or_parallel (mode, orig_mode,
7372 cum->mmx_regno + FIRST_MMX_REG);
7374 break;
7377 return NULL_RTX;
7380 static rtx
7381 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7382 enum machine_mode orig_mode, const_tree type, bool named)
7384 /* Handle a hidden AL argument containing number of registers
7385 for varargs x86-64 functions. */
7386 if (mode == VOIDmode)
7387 return GEN_INT (cum->maybe_vaarg
7388 ? (cum->sse_nregs < 0
7389 ? X86_64_SSE_REGPARM_MAX
7390 : cum->sse_regno)
7391 : -1);
7393 switch (mode)
7395 default:
7396 break;
7398 case V8SFmode:
7399 case V8SImode:
7400 case V32QImode:
7401 case V16HImode:
7402 case V4DFmode:
7403 case V4DImode:
7404 case V16SFmode:
7405 case V16SImode:
7406 case V64QImode:
7407 case V32HImode:
7408 case V8DFmode:
7409 case V8DImode:
7410 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7411 if (!named)
7412 return NULL;
7413 break;
7416 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7417 cum->sse_nregs,
7418 &x86_64_int_parameter_registers [cum->regno],
7419 cum->sse_regno);
7422 static rtx
7423 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7424 enum machine_mode orig_mode, bool named,
7425 HOST_WIDE_INT bytes)
7427 unsigned int regno;
7429 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7430 We use value of -2 to specify that current function call is MSABI. */
7431 if (mode == VOIDmode)
7432 return GEN_INT (-2);
7434 /* If we've run out of registers, it goes on the stack. */
7435 if (cum->nregs == 0)
7436 return NULL_RTX;
7438 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7440 /* Only floating point modes are passed in anything but integer regs. */
7441 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7443 if (named)
7444 regno = cum->regno + FIRST_SSE_REG;
7445 else
7447 rtx t1, t2;
7449 /* Unnamed floating parameters are passed in both the
7450 SSE and integer registers. */
7451 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7452 t2 = gen_rtx_REG (mode, regno);
7453 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7454 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7455 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7458 /* Handle aggregated types passed in register. */
7459 if (orig_mode == BLKmode)
7461 if (bytes > 0 && bytes <= 8)
7462 mode = (bytes > 4 ? DImode : SImode);
7463 if (mode == BLKmode)
7464 mode = DImode;
7467 return gen_reg_or_parallel (mode, orig_mode, regno);
7470 /* Return where to put the arguments to a function.
7471 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7473 MODE is the argument's machine mode. TYPE is the data type of the
7474 argument. It is null for libcalls where that information may not be
7475 available. CUM gives information about the preceding args and about
7476 the function being called. NAMED is nonzero if this argument is a
7477 named parameter (otherwise it is an extra parameter matching an
7478 ellipsis). */
7480 static rtx
7481 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7482 const_tree type, bool named)
7484 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7485 enum machine_mode mode = omode;
7486 HOST_WIDE_INT bytes, words;
7487 rtx arg;
7489 if (mode == BLKmode)
7490 bytes = int_size_in_bytes (type);
7491 else
7492 bytes = GET_MODE_SIZE (mode);
7493 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7495 /* To simplify the code below, represent vector types with a vector mode
7496 even if MMX/SSE are not active. */
7497 if (type && TREE_CODE (type) == VECTOR_TYPE)
7498 mode = type_natural_mode (type, cum, false);
7500 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7501 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7502 else if (TARGET_64BIT)
7503 arg = function_arg_64 (cum, mode, omode, type, named);
7504 else
7505 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7507 return arg;
7510 /* A C expression that indicates when an argument must be passed by
7511 reference. If nonzero for an argument, a copy of that argument is
7512 made in memory and a pointer to the argument is passed instead of
7513 the argument itself. The pointer is passed in whatever way is
7514 appropriate for passing a pointer to that type. */
7516 static bool
7517 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7518 const_tree type, bool named ATTRIBUTE_UNUSED)
7520 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7522 /* See Windows x64 Software Convention. */
7523 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7525 int msize = (int) GET_MODE_SIZE (mode);
7526 if (type)
7528 /* Arrays are passed by reference. */
7529 if (TREE_CODE (type) == ARRAY_TYPE)
7530 return true;
7532 if (AGGREGATE_TYPE_P (type))
7534 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7535 are passed by reference. */
7536 msize = int_size_in_bytes (type);
7540 /* __m128 is passed by reference. */
7541 switch (msize) {
7542 case 1: case 2: case 4: case 8:
7543 break;
7544 default:
7545 return true;
7548 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7549 return 1;
7551 return 0;
7554 /* Return true when TYPE should be 128bit aligned for 32bit argument
7555 passing ABI. XXX: This function is obsolete and is only used for
7556 checking psABI compatibility with previous versions of GCC. */
7558 static bool
7559 ix86_compat_aligned_value_p (const_tree type)
7561 enum machine_mode mode = TYPE_MODE (type);
7562 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7563 || mode == TDmode
7564 || mode == TFmode
7565 || mode == TCmode)
7566 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7567 return true;
7568 if (TYPE_ALIGN (type) < 128)
7569 return false;
7571 if (AGGREGATE_TYPE_P (type))
7573 /* Walk the aggregates recursively. */
7574 switch (TREE_CODE (type))
7576 case RECORD_TYPE:
7577 case UNION_TYPE:
7578 case QUAL_UNION_TYPE:
7580 tree field;
7582 /* Walk all the structure fields. */
7583 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7585 if (TREE_CODE (field) == FIELD_DECL
7586 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7587 return true;
7589 break;
7592 case ARRAY_TYPE:
7593 /* Just for use if some languages passes arrays by value. */
7594 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7595 return true;
7596 break;
7598 default:
7599 gcc_unreachable ();
7602 return false;
7605 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7606 XXX: This function is obsolete and is only used for checking psABI
7607 compatibility with previous versions of GCC. */
7609 static unsigned int
7610 ix86_compat_function_arg_boundary (enum machine_mode mode,
7611 const_tree type, unsigned int align)
7613 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7614 natural boundaries. */
7615 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7617 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7618 make an exception for SSE modes since these require 128bit
7619 alignment.
7621 The handling here differs from field_alignment. ICC aligns MMX
7622 arguments to 4 byte boundaries, while structure fields are aligned
7623 to 8 byte boundaries. */
7624 if (!type)
7626 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7627 align = PARM_BOUNDARY;
7629 else
7631 if (!ix86_compat_aligned_value_p (type))
7632 align = PARM_BOUNDARY;
7635 if (align > BIGGEST_ALIGNMENT)
7636 align = BIGGEST_ALIGNMENT;
7637 return align;
7640 /* Return true when TYPE should be 128bit aligned for 32bit argument
7641 passing ABI. */
7643 static bool
7644 ix86_contains_aligned_value_p (const_tree type)
7646 enum machine_mode mode = TYPE_MODE (type);
7648 if (mode == XFmode || mode == XCmode)
7649 return false;
7651 if (TYPE_ALIGN (type) < 128)
7652 return false;
7654 if (AGGREGATE_TYPE_P (type))
7656 /* Walk the aggregates recursively. */
7657 switch (TREE_CODE (type))
7659 case RECORD_TYPE:
7660 case UNION_TYPE:
7661 case QUAL_UNION_TYPE:
7663 tree field;
7665 /* Walk all the structure fields. */
7666 for (field = TYPE_FIELDS (type);
7667 field;
7668 field = DECL_CHAIN (field))
7670 if (TREE_CODE (field) == FIELD_DECL
7671 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7672 return true;
7674 break;
7677 case ARRAY_TYPE:
7678 /* Just for use if some languages passes arrays by value. */
7679 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7680 return true;
7681 break;
7683 default:
7684 gcc_unreachable ();
7687 else
7688 return TYPE_ALIGN (type) >= 128;
7690 return false;
7693 /* Gives the alignment boundary, in bits, of an argument with the
7694 specified mode and type. */
7696 static unsigned int
7697 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7699 unsigned int align;
7700 if (type)
7702 /* Since the main variant type is used for call, we convert it to
7703 the main variant type. */
7704 type = TYPE_MAIN_VARIANT (type);
7705 align = TYPE_ALIGN (type);
7707 else
7708 align = GET_MODE_ALIGNMENT (mode);
7709 if (align < PARM_BOUNDARY)
7710 align = PARM_BOUNDARY;
7711 else
7713 static bool warned;
7714 unsigned int saved_align = align;
7716 if (!TARGET_64BIT)
7718 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7719 if (!type)
7721 if (mode == XFmode || mode == XCmode)
7722 align = PARM_BOUNDARY;
7724 else if (!ix86_contains_aligned_value_p (type))
7725 align = PARM_BOUNDARY;
7727 if (align < 128)
7728 align = PARM_BOUNDARY;
7731 if (warn_psabi
7732 && !warned
7733 && align != ix86_compat_function_arg_boundary (mode, type,
7734 saved_align))
7736 warned = true;
7737 inform (input_location,
7738 "The ABI for passing parameters with %d-byte"
7739 " alignment has changed in GCC 4.6",
7740 align / BITS_PER_UNIT);
7744 return align;
7747 /* Return true if N is a possible register number of function value. */
7749 static bool
7750 ix86_function_value_regno_p (const unsigned int regno)
7752 switch (regno)
7754 case AX_REG:
7755 case DX_REG:
7756 return true;
7757 case DI_REG:
7758 case SI_REG:
7759 return TARGET_64BIT && ix86_abi != MS_ABI;
7761 /* Complex values are returned in %st(0)/%st(1) pair. */
7762 case ST0_REG:
7763 case ST1_REG:
7764 /* TODO: The function should depend on current function ABI but
7765 builtins.c would need updating then. Therefore we use the
7766 default ABI. */
7767 if (TARGET_64BIT && ix86_abi == MS_ABI)
7768 return false;
7769 return TARGET_FLOAT_RETURNS_IN_80387;
7771 /* Complex values are returned in %xmm0/%xmm1 pair. */
7772 case XMM0_REG:
7773 case XMM1_REG:
7774 return TARGET_SSE;
7776 case MM0_REG:
7777 if (TARGET_MACHO || TARGET_64BIT)
7778 return false;
7779 return TARGET_MMX;
7782 return false;
7785 /* Define how to find the value returned by a function.
7786 VALTYPE is the data type of the value (as a tree).
7787 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7788 otherwise, FUNC is 0. */
7790 static rtx
7791 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7792 const_tree fntype, const_tree fn)
7794 unsigned int regno;
7796 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7797 we normally prevent this case when mmx is not available. However
7798 some ABIs may require the result to be returned like DImode. */
7799 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7800 regno = FIRST_MMX_REG;
7802 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7803 we prevent this case when sse is not available. However some ABIs
7804 may require the result to be returned like integer TImode. */
7805 else if (mode == TImode
7806 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7807 regno = FIRST_SSE_REG;
7809 /* 32-byte vector modes in %ymm0. */
7810 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7811 regno = FIRST_SSE_REG;
7813 /* 64-byte vector modes in %zmm0. */
7814 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7815 regno = FIRST_SSE_REG;
7817 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7818 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7819 regno = FIRST_FLOAT_REG;
7820 else
7821 /* Most things go in %eax. */
7822 regno = AX_REG;
7824 /* Override FP return register with %xmm0 for local functions when
7825 SSE math is enabled or for functions with sseregparm attribute. */
7826 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7828 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7829 if ((sse_level >= 1 && mode == SFmode)
7830 || (sse_level == 2 && mode == DFmode))
7831 regno = FIRST_SSE_REG;
7834 /* OImode shouldn't be used directly. */
7835 gcc_assert (mode != OImode);
7837 return gen_rtx_REG (orig_mode, regno);
7840 static rtx
7841 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7842 const_tree valtype)
7844 rtx ret;
7846 /* Handle libcalls, which don't provide a type node. */
7847 if (valtype == NULL)
7849 unsigned int regno;
7851 switch (mode)
7853 case SFmode:
7854 case SCmode:
7855 case DFmode:
7856 case DCmode:
7857 case TFmode:
7858 case SDmode:
7859 case DDmode:
7860 case TDmode:
7861 regno = FIRST_SSE_REG;
7862 break;
7863 case XFmode:
7864 case XCmode:
7865 regno = FIRST_FLOAT_REG;
7866 break;
7867 case TCmode:
7868 return NULL;
7869 default:
7870 regno = AX_REG;
7873 return gen_rtx_REG (mode, regno);
7875 else if (POINTER_TYPE_P (valtype))
7877 /* Pointers are always returned in word_mode. */
7878 mode = word_mode;
7881 ret = construct_container (mode, orig_mode, valtype, 1,
7882 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7883 x86_64_int_return_registers, 0);
7885 /* For zero sized structures, construct_container returns NULL, but we
7886 need to keep rest of compiler happy by returning meaningful value. */
7887 if (!ret)
7888 ret = gen_rtx_REG (orig_mode, AX_REG);
7890 return ret;
7893 static rtx
7894 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7895 const_tree valtype)
7897 unsigned int regno = AX_REG;
7899 if (TARGET_SSE)
7901 switch (GET_MODE_SIZE (mode))
7903 case 16:
7904 if (valtype != NULL_TREE
7905 && !VECTOR_INTEGER_TYPE_P (valtype)
7906 && !VECTOR_INTEGER_TYPE_P (valtype)
7907 && !INTEGRAL_TYPE_P (valtype)
7908 && !VECTOR_FLOAT_TYPE_P (valtype))
7909 break;
7910 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7911 && !COMPLEX_MODE_P (mode))
7912 regno = FIRST_SSE_REG;
7913 break;
7914 case 8:
7915 case 4:
7916 if (mode == SFmode || mode == DFmode)
7917 regno = FIRST_SSE_REG;
7918 break;
7919 default:
7920 break;
7923 return gen_rtx_REG (orig_mode, regno);
7926 static rtx
7927 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7928 enum machine_mode orig_mode, enum machine_mode mode)
7930 const_tree fn, fntype;
7932 fn = NULL_TREE;
7933 if (fntype_or_decl && DECL_P (fntype_or_decl))
7934 fn = fntype_or_decl;
7935 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7937 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7938 return function_value_ms_64 (orig_mode, mode, valtype);
7939 else if (TARGET_64BIT)
7940 return function_value_64 (orig_mode, mode, valtype);
7941 else
7942 return function_value_32 (orig_mode, mode, fntype, fn);
7945 static rtx
7946 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7947 bool outgoing ATTRIBUTE_UNUSED)
7949 enum machine_mode mode, orig_mode;
7951 orig_mode = TYPE_MODE (valtype);
7952 mode = type_natural_mode (valtype, NULL, true);
7953 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7956 /* Pointer function arguments and return values are promoted to
7957 word_mode. */
7959 static enum machine_mode
7960 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7961 int *punsignedp, const_tree fntype,
7962 int for_return)
7964 if (type != NULL_TREE && POINTER_TYPE_P (type))
7966 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7967 return word_mode;
7969 return default_promote_function_mode (type, mode, punsignedp, fntype,
7970 for_return);
7973 /* Return true if a structure, union or array with MODE containing FIELD
7974 should be accessed using BLKmode. */
7976 static bool
7977 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7979 /* Union with XFmode must be in BLKmode. */
7980 return (mode == XFmode
7981 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7982 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7986 ix86_libcall_value (enum machine_mode mode)
7988 return ix86_function_value_1 (NULL, NULL, mode, mode);
7991 /* Return true iff type is returned in memory. */
7993 static bool
7994 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7996 #ifdef SUBTARGET_RETURN_IN_MEMORY
7997 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7998 #else
7999 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8000 HOST_WIDE_INT size;
8002 if (TARGET_64BIT)
8004 if (ix86_function_type_abi (fntype) == MS_ABI)
8006 size = int_size_in_bytes (type);
8008 /* __m128 is returned in xmm0. */
8009 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8010 || INTEGRAL_TYPE_P (type)
8011 || VECTOR_FLOAT_TYPE_P (type))
8012 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8013 && !COMPLEX_MODE_P (mode)
8014 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8015 return false;
8017 /* Otherwise, the size must be exactly in [1248]. */
8018 return size != 1 && size != 2 && size != 4 && size != 8;
8020 else
8022 int needed_intregs, needed_sseregs;
8024 return examine_argument (mode, type, 1,
8025 &needed_intregs, &needed_sseregs);
8028 else
8030 if (mode == BLKmode)
8031 return true;
8033 size = int_size_in_bytes (type);
8035 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8036 return false;
8038 if (VECTOR_MODE_P (mode) || mode == TImode)
8040 /* User-created vectors small enough to fit in EAX. */
8041 if (size < 8)
8042 return false;
8044 /* Unless ABI prescibes otherwise,
8045 MMX/3dNow values are returned in MM0 if available. */
8047 if (size == 8)
8048 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8050 /* SSE values are returned in XMM0 if available. */
8051 if (size == 16)
8052 return !TARGET_SSE;
8054 /* AVX values are returned in YMM0 if available. */
8055 if (size == 32)
8056 return !TARGET_AVX;
8058 /* AVX512F values are returned in ZMM0 if available. */
8059 if (size == 64)
8060 return !TARGET_AVX512F;
8063 if (mode == XFmode)
8064 return false;
8066 if (size > 12)
8067 return true;
8069 /* OImode shouldn't be used directly. */
8070 gcc_assert (mode != OImode);
8072 return false;
8074 #endif
8078 /* Create the va_list data type. */
8080 /* Returns the calling convention specific va_list date type.
8081 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8083 static tree
8084 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8086 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8088 /* For i386 we use plain pointer to argument area. */
8089 if (!TARGET_64BIT || abi == MS_ABI)
8090 return build_pointer_type (char_type_node);
8092 record = lang_hooks.types.make_type (RECORD_TYPE);
8093 type_decl = build_decl (BUILTINS_LOCATION,
8094 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8096 f_gpr = build_decl (BUILTINS_LOCATION,
8097 FIELD_DECL, get_identifier ("gp_offset"),
8098 unsigned_type_node);
8099 f_fpr = build_decl (BUILTINS_LOCATION,
8100 FIELD_DECL, get_identifier ("fp_offset"),
8101 unsigned_type_node);
8102 f_ovf = build_decl (BUILTINS_LOCATION,
8103 FIELD_DECL, get_identifier ("overflow_arg_area"),
8104 ptr_type_node);
8105 f_sav = build_decl (BUILTINS_LOCATION,
8106 FIELD_DECL, get_identifier ("reg_save_area"),
8107 ptr_type_node);
8109 va_list_gpr_counter_field = f_gpr;
8110 va_list_fpr_counter_field = f_fpr;
8112 DECL_FIELD_CONTEXT (f_gpr) = record;
8113 DECL_FIELD_CONTEXT (f_fpr) = record;
8114 DECL_FIELD_CONTEXT (f_ovf) = record;
8115 DECL_FIELD_CONTEXT (f_sav) = record;
8117 TYPE_STUB_DECL (record) = type_decl;
8118 TYPE_NAME (record) = type_decl;
8119 TYPE_FIELDS (record) = f_gpr;
8120 DECL_CHAIN (f_gpr) = f_fpr;
8121 DECL_CHAIN (f_fpr) = f_ovf;
8122 DECL_CHAIN (f_ovf) = f_sav;
8124 layout_type (record);
8126 /* The correct type is an array type of one element. */
8127 return build_array_type (record, build_index_type (size_zero_node));
8130 /* Setup the builtin va_list data type and for 64-bit the additional
8131 calling convention specific va_list data types. */
8133 static tree
8134 ix86_build_builtin_va_list (void)
8136 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8138 /* Initialize abi specific va_list builtin types. */
8139 if (TARGET_64BIT)
8141 tree t;
8142 if (ix86_abi == MS_ABI)
8144 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8145 if (TREE_CODE (t) != RECORD_TYPE)
8146 t = build_variant_type_copy (t);
8147 sysv_va_list_type_node = t;
8149 else
8151 t = ret;
8152 if (TREE_CODE (t) != RECORD_TYPE)
8153 t = build_variant_type_copy (t);
8154 sysv_va_list_type_node = t;
8156 if (ix86_abi != MS_ABI)
8158 t = ix86_build_builtin_va_list_abi (MS_ABI);
8159 if (TREE_CODE (t) != RECORD_TYPE)
8160 t = build_variant_type_copy (t);
8161 ms_va_list_type_node = t;
8163 else
8165 t = ret;
8166 if (TREE_CODE (t) != RECORD_TYPE)
8167 t = build_variant_type_copy (t);
8168 ms_va_list_type_node = t;
8172 return ret;
8175 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8177 static void
8178 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8180 rtx save_area, mem;
8181 alias_set_type set;
8182 int i, max;
8184 /* GPR size of varargs save area. */
8185 if (cfun->va_list_gpr_size)
8186 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8187 else
8188 ix86_varargs_gpr_size = 0;
8190 /* FPR size of varargs save area. We don't need it if we don't pass
8191 anything in SSE registers. */
8192 if (TARGET_SSE && cfun->va_list_fpr_size)
8193 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8194 else
8195 ix86_varargs_fpr_size = 0;
8197 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8198 return;
8200 save_area = frame_pointer_rtx;
8201 set = get_varargs_alias_set ();
8203 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8204 if (max > X86_64_REGPARM_MAX)
8205 max = X86_64_REGPARM_MAX;
8207 for (i = cum->regno; i < max; i++)
8209 mem = gen_rtx_MEM (word_mode,
8210 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8211 MEM_NOTRAP_P (mem) = 1;
8212 set_mem_alias_set (mem, set);
8213 emit_move_insn (mem,
8214 gen_rtx_REG (word_mode,
8215 x86_64_int_parameter_registers[i]));
8218 if (ix86_varargs_fpr_size)
8220 enum machine_mode smode;
8221 rtx label, test;
8223 /* Now emit code to save SSE registers. The AX parameter contains number
8224 of SSE parameter registers used to call this function, though all we
8225 actually check here is the zero/non-zero status. */
8227 label = gen_label_rtx ();
8228 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8229 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8230 label));
8232 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8233 we used movdqa (i.e. TImode) instead? Perhaps even better would
8234 be if we could determine the real mode of the data, via a hook
8235 into pass_stdarg. Ignore all that for now. */
8236 smode = V4SFmode;
8237 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8238 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8240 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8241 if (max > X86_64_SSE_REGPARM_MAX)
8242 max = X86_64_SSE_REGPARM_MAX;
8244 for (i = cum->sse_regno; i < max; ++i)
8246 mem = plus_constant (Pmode, save_area,
8247 i * 16 + ix86_varargs_gpr_size);
8248 mem = gen_rtx_MEM (smode, mem);
8249 MEM_NOTRAP_P (mem) = 1;
8250 set_mem_alias_set (mem, set);
8251 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8253 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8256 emit_label (label);
8260 static void
8261 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8263 alias_set_type set = get_varargs_alias_set ();
8264 int i;
8266 /* Reset to zero, as there might be a sysv vaarg used
8267 before. */
8268 ix86_varargs_gpr_size = 0;
8269 ix86_varargs_fpr_size = 0;
8271 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8273 rtx reg, mem;
8275 mem = gen_rtx_MEM (Pmode,
8276 plus_constant (Pmode, virtual_incoming_args_rtx,
8277 i * UNITS_PER_WORD));
8278 MEM_NOTRAP_P (mem) = 1;
8279 set_mem_alias_set (mem, set);
8281 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8282 emit_move_insn (mem, reg);
8286 static void
8287 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8288 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8289 int no_rtl)
8291 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8292 CUMULATIVE_ARGS next_cum;
8293 tree fntype;
8295 /* This argument doesn't appear to be used anymore. Which is good,
8296 because the old code here didn't suppress rtl generation. */
8297 gcc_assert (!no_rtl);
8299 if (!TARGET_64BIT)
8300 return;
8302 fntype = TREE_TYPE (current_function_decl);
8304 /* For varargs, we do not want to skip the dummy va_dcl argument.
8305 For stdargs, we do want to skip the last named argument. */
8306 next_cum = *cum;
8307 if (stdarg_p (fntype))
8308 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8309 true);
8311 if (cum->call_abi == MS_ABI)
8312 setup_incoming_varargs_ms_64 (&next_cum);
8313 else
8314 setup_incoming_varargs_64 (&next_cum);
8317 /* Checks if TYPE is of kind va_list char *. */
8319 static bool
8320 is_va_list_char_pointer (tree type)
8322 tree canonic;
8324 /* For 32-bit it is always true. */
8325 if (!TARGET_64BIT)
8326 return true;
8327 canonic = ix86_canonical_va_list_type (type);
8328 return (canonic == ms_va_list_type_node
8329 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8332 /* Implement va_start. */
8334 static void
8335 ix86_va_start (tree valist, rtx nextarg)
8337 HOST_WIDE_INT words, n_gpr, n_fpr;
8338 tree f_gpr, f_fpr, f_ovf, f_sav;
8339 tree gpr, fpr, ovf, sav, t;
8340 tree type;
8341 rtx ovf_rtx;
8343 if (flag_split_stack
8344 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8346 unsigned int scratch_regno;
8348 /* When we are splitting the stack, we can't refer to the stack
8349 arguments using internal_arg_pointer, because they may be on
8350 the old stack. The split stack prologue will arrange to
8351 leave a pointer to the old stack arguments in a scratch
8352 register, which we here copy to a pseudo-register. The split
8353 stack prologue can't set the pseudo-register directly because
8354 it (the prologue) runs before any registers have been saved. */
8356 scratch_regno = split_stack_prologue_scratch_regno ();
8357 if (scratch_regno != INVALID_REGNUM)
8359 rtx reg, seq;
8361 reg = gen_reg_rtx (Pmode);
8362 cfun->machine->split_stack_varargs_pointer = reg;
8364 start_sequence ();
8365 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8366 seq = get_insns ();
8367 end_sequence ();
8369 push_topmost_sequence ();
8370 emit_insn_after (seq, entry_of_function ());
8371 pop_topmost_sequence ();
8375 /* Only 64bit target needs something special. */
8376 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8378 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8379 std_expand_builtin_va_start (valist, nextarg);
8380 else
8382 rtx va_r, next;
8384 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8385 next = expand_binop (ptr_mode, add_optab,
8386 cfun->machine->split_stack_varargs_pointer,
8387 crtl->args.arg_offset_rtx,
8388 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8389 convert_move (va_r, next, 0);
8391 return;
8394 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8395 f_fpr = DECL_CHAIN (f_gpr);
8396 f_ovf = DECL_CHAIN (f_fpr);
8397 f_sav = DECL_CHAIN (f_ovf);
8399 valist = build_simple_mem_ref (valist);
8400 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8401 /* The following should be folded into the MEM_REF offset. */
8402 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8403 f_gpr, NULL_TREE);
8404 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8405 f_fpr, NULL_TREE);
8406 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8407 f_ovf, NULL_TREE);
8408 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8409 f_sav, NULL_TREE);
8411 /* Count number of gp and fp argument registers used. */
8412 words = crtl->args.info.words;
8413 n_gpr = crtl->args.info.regno;
8414 n_fpr = crtl->args.info.sse_regno;
8416 if (cfun->va_list_gpr_size)
8418 type = TREE_TYPE (gpr);
8419 t = build2 (MODIFY_EXPR, type,
8420 gpr, build_int_cst (type, n_gpr * 8));
8421 TREE_SIDE_EFFECTS (t) = 1;
8422 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8425 if (TARGET_SSE && cfun->va_list_fpr_size)
8427 type = TREE_TYPE (fpr);
8428 t = build2 (MODIFY_EXPR, type, fpr,
8429 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8430 TREE_SIDE_EFFECTS (t) = 1;
8431 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8434 /* Find the overflow area. */
8435 type = TREE_TYPE (ovf);
8436 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8437 ovf_rtx = crtl->args.internal_arg_pointer;
8438 else
8439 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8440 t = make_tree (type, ovf_rtx);
8441 if (words != 0)
8442 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8443 t = build2 (MODIFY_EXPR, type, ovf, t);
8444 TREE_SIDE_EFFECTS (t) = 1;
8445 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8447 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8449 /* Find the register save area.
8450 Prologue of the function save it right above stack frame. */
8451 type = TREE_TYPE (sav);
8452 t = make_tree (type, frame_pointer_rtx);
8453 if (!ix86_varargs_gpr_size)
8454 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8455 t = build2 (MODIFY_EXPR, type, sav, t);
8456 TREE_SIDE_EFFECTS (t) = 1;
8457 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8461 /* Implement va_arg. */
8463 static tree
8464 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8465 gimple_seq *post_p)
8467 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8468 tree f_gpr, f_fpr, f_ovf, f_sav;
8469 tree gpr, fpr, ovf, sav, t;
8470 int size, rsize;
8471 tree lab_false, lab_over = NULL_TREE;
8472 tree addr, t2;
8473 rtx container;
8474 int indirect_p = 0;
8475 tree ptrtype;
8476 enum machine_mode nat_mode;
8477 unsigned int arg_boundary;
8479 /* Only 64bit target needs something special. */
8480 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8481 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8483 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8484 f_fpr = DECL_CHAIN (f_gpr);
8485 f_ovf = DECL_CHAIN (f_fpr);
8486 f_sav = DECL_CHAIN (f_ovf);
8488 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8489 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8490 valist = build_va_arg_indirect_ref (valist);
8491 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8492 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8493 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8495 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8496 if (indirect_p)
8497 type = build_pointer_type (type);
8498 size = int_size_in_bytes (type);
8499 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8501 nat_mode = type_natural_mode (type, NULL, false);
8502 switch (nat_mode)
8504 case V8SFmode:
8505 case V8SImode:
8506 case V32QImode:
8507 case V16HImode:
8508 case V4DFmode:
8509 case V4DImode:
8510 case V16SFmode:
8511 case V16SImode:
8512 case V64QImode:
8513 case V32HImode:
8514 case V8DFmode:
8515 case V8DImode:
8516 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8517 if (!TARGET_64BIT_MS_ABI)
8519 container = NULL;
8520 break;
8523 default:
8524 container = construct_container (nat_mode, TYPE_MODE (type),
8525 type, 0, X86_64_REGPARM_MAX,
8526 X86_64_SSE_REGPARM_MAX, intreg,
8528 break;
8531 /* Pull the value out of the saved registers. */
8533 addr = create_tmp_var (ptr_type_node, "addr");
8535 if (container)
8537 int needed_intregs, needed_sseregs;
8538 bool need_temp;
8539 tree int_addr, sse_addr;
8541 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8542 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8544 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8546 need_temp = (!REG_P (container)
8547 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8548 || TYPE_ALIGN (type) > 128));
8550 /* In case we are passing structure, verify that it is consecutive block
8551 on the register save area. If not we need to do moves. */
8552 if (!need_temp && !REG_P (container))
8554 /* Verify that all registers are strictly consecutive */
8555 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8557 int i;
8559 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8561 rtx slot = XVECEXP (container, 0, i);
8562 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8563 || INTVAL (XEXP (slot, 1)) != i * 16)
8564 need_temp = 1;
8567 else
8569 int i;
8571 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8573 rtx slot = XVECEXP (container, 0, i);
8574 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8575 || INTVAL (XEXP (slot, 1)) != i * 8)
8576 need_temp = 1;
8580 if (!need_temp)
8582 int_addr = addr;
8583 sse_addr = addr;
8585 else
8587 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8588 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8591 /* First ensure that we fit completely in registers. */
8592 if (needed_intregs)
8594 t = build_int_cst (TREE_TYPE (gpr),
8595 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8596 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8597 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8598 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8599 gimplify_and_add (t, pre_p);
8601 if (needed_sseregs)
8603 t = build_int_cst (TREE_TYPE (fpr),
8604 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8605 + X86_64_REGPARM_MAX * 8);
8606 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8607 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8608 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8609 gimplify_and_add (t, pre_p);
8612 /* Compute index to start of area used for integer regs. */
8613 if (needed_intregs)
8615 /* int_addr = gpr + sav; */
8616 t = fold_build_pointer_plus (sav, gpr);
8617 gimplify_assign (int_addr, t, pre_p);
8619 if (needed_sseregs)
8621 /* sse_addr = fpr + sav; */
8622 t = fold_build_pointer_plus (sav, fpr);
8623 gimplify_assign (sse_addr, t, pre_p);
8625 if (need_temp)
8627 int i, prev_size = 0;
8628 tree temp = create_tmp_var (type, "va_arg_tmp");
8630 /* addr = &temp; */
8631 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8632 gimplify_assign (addr, t, pre_p);
8634 for (i = 0; i < XVECLEN (container, 0); i++)
8636 rtx slot = XVECEXP (container, 0, i);
8637 rtx reg = XEXP (slot, 0);
8638 enum machine_mode mode = GET_MODE (reg);
8639 tree piece_type;
8640 tree addr_type;
8641 tree daddr_type;
8642 tree src_addr, src;
8643 int src_offset;
8644 tree dest_addr, dest;
8645 int cur_size = GET_MODE_SIZE (mode);
8647 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8648 prev_size = INTVAL (XEXP (slot, 1));
8649 if (prev_size + cur_size > size)
8651 cur_size = size - prev_size;
8652 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8653 if (mode == BLKmode)
8654 mode = QImode;
8656 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8657 if (mode == GET_MODE (reg))
8658 addr_type = build_pointer_type (piece_type);
8659 else
8660 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8661 true);
8662 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8663 true);
8665 if (SSE_REGNO_P (REGNO (reg)))
8667 src_addr = sse_addr;
8668 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8670 else
8672 src_addr = int_addr;
8673 src_offset = REGNO (reg) * 8;
8675 src_addr = fold_convert (addr_type, src_addr);
8676 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8678 dest_addr = fold_convert (daddr_type, addr);
8679 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8680 if (cur_size == GET_MODE_SIZE (mode))
8682 src = build_va_arg_indirect_ref (src_addr);
8683 dest = build_va_arg_indirect_ref (dest_addr);
8685 gimplify_assign (dest, src, pre_p);
8687 else
8689 tree copy
8690 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8691 3, dest_addr, src_addr,
8692 size_int (cur_size));
8693 gimplify_and_add (copy, pre_p);
8695 prev_size += cur_size;
8699 if (needed_intregs)
8701 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8702 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8703 gimplify_assign (gpr, t, pre_p);
8706 if (needed_sseregs)
8708 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8709 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8710 gimplify_assign (fpr, t, pre_p);
8713 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8715 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8718 /* ... otherwise out of the overflow area. */
8720 /* When we align parameter on stack for caller, if the parameter
8721 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8722 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8723 here with caller. */
8724 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8725 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8726 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8728 /* Care for on-stack alignment if needed. */
8729 if (arg_boundary <= 64 || size == 0)
8730 t = ovf;
8731 else
8733 HOST_WIDE_INT align = arg_boundary / 8;
8734 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8735 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8736 build_int_cst (TREE_TYPE (t), -align));
8739 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8740 gimplify_assign (addr, t, pre_p);
8742 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8743 gimplify_assign (unshare_expr (ovf), t, pre_p);
8745 if (container)
8746 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8748 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8749 addr = fold_convert (ptrtype, addr);
8751 if (indirect_p)
8752 addr = build_va_arg_indirect_ref (addr);
8753 return build_va_arg_indirect_ref (addr);
8756 /* Return true if OPNUM's MEM should be matched
8757 in movabs* patterns. */
8759 bool
8760 ix86_check_movabs (rtx insn, int opnum)
8762 rtx set, mem;
8764 set = PATTERN (insn);
8765 if (GET_CODE (set) == PARALLEL)
8766 set = XVECEXP (set, 0, 0);
8767 gcc_assert (GET_CODE (set) == SET);
8768 mem = XEXP (set, opnum);
8769 while (GET_CODE (mem) == SUBREG)
8770 mem = SUBREG_REG (mem);
8771 gcc_assert (MEM_P (mem));
8772 return volatile_ok || !MEM_VOLATILE_P (mem);
8775 /* Initialize the table of extra 80387 mathematical constants. */
8777 static void
8778 init_ext_80387_constants (void)
8780 static const char * cst[5] =
8782 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8783 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8784 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8785 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8786 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8788 int i;
8790 for (i = 0; i < 5; i++)
8792 real_from_string (&ext_80387_constants_table[i], cst[i]);
8793 /* Ensure each constant is rounded to XFmode precision. */
8794 real_convert (&ext_80387_constants_table[i],
8795 XFmode, &ext_80387_constants_table[i]);
8798 ext_80387_constants_init = 1;
8801 /* Return non-zero if the constant is something that
8802 can be loaded with a special instruction. */
8805 standard_80387_constant_p (rtx x)
8807 enum machine_mode mode = GET_MODE (x);
8809 REAL_VALUE_TYPE r;
8811 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8812 return -1;
8814 if (x == CONST0_RTX (mode))
8815 return 1;
8816 if (x == CONST1_RTX (mode))
8817 return 2;
8819 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8821 /* For XFmode constants, try to find a special 80387 instruction when
8822 optimizing for size or on those CPUs that benefit from them. */
8823 if (mode == XFmode
8824 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8826 int i;
8828 if (! ext_80387_constants_init)
8829 init_ext_80387_constants ();
8831 for (i = 0; i < 5; i++)
8832 if (real_identical (&r, &ext_80387_constants_table[i]))
8833 return i + 3;
8836 /* Load of the constant -0.0 or -1.0 will be split as
8837 fldz;fchs or fld1;fchs sequence. */
8838 if (real_isnegzero (&r))
8839 return 8;
8840 if (real_identical (&r, &dconstm1))
8841 return 9;
8843 return 0;
8846 /* Return the opcode of the special instruction to be used to load
8847 the constant X. */
8849 const char *
8850 standard_80387_constant_opcode (rtx x)
8852 switch (standard_80387_constant_p (x))
8854 case 1:
8855 return "fldz";
8856 case 2:
8857 return "fld1";
8858 case 3:
8859 return "fldlg2";
8860 case 4:
8861 return "fldln2";
8862 case 5:
8863 return "fldl2e";
8864 case 6:
8865 return "fldl2t";
8866 case 7:
8867 return "fldpi";
8868 case 8:
8869 case 9:
8870 return "#";
8871 default:
8872 gcc_unreachable ();
8876 /* Return the CONST_DOUBLE representing the 80387 constant that is
8877 loaded by the specified special instruction. The argument IDX
8878 matches the return value from standard_80387_constant_p. */
8881 standard_80387_constant_rtx (int idx)
8883 int i;
8885 if (! ext_80387_constants_init)
8886 init_ext_80387_constants ();
8888 switch (idx)
8890 case 3:
8891 case 4:
8892 case 5:
8893 case 6:
8894 case 7:
8895 i = idx - 3;
8896 break;
8898 default:
8899 gcc_unreachable ();
8902 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8903 XFmode);
8906 /* Return 1 if X is all 0s and 2 if x is all 1s
8907 in supported SSE/AVX vector mode. */
8910 standard_sse_constant_p (rtx x)
8912 enum machine_mode mode = GET_MODE (x);
8914 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8915 return 1;
8916 if (vector_all_ones_operand (x, mode))
8917 switch (mode)
8919 case V16QImode:
8920 case V8HImode:
8921 case V4SImode:
8922 case V2DImode:
8923 if (TARGET_SSE2)
8924 return 2;
8925 case V32QImode:
8926 case V16HImode:
8927 case V8SImode:
8928 case V4DImode:
8929 if (TARGET_AVX2)
8930 return 2;
8931 case V64QImode:
8932 case V32HImode:
8933 case V16SImode:
8934 case V8DImode:
8935 if (TARGET_AVX512F)
8936 return 2;
8937 default:
8938 break;
8941 return 0;
8944 /* Return the opcode of the special instruction to be used to load
8945 the constant X. */
8947 const char *
8948 standard_sse_constant_opcode (rtx insn, rtx x)
8950 switch (standard_sse_constant_p (x))
8952 case 1:
8953 switch (get_attr_mode (insn))
8955 case MODE_XI:
8956 case MODE_V16SF:
8957 return "vpxord\t%g0, %g0, %g0";
8958 case MODE_V8DF:
8959 return "vpxorq\t%g0, %g0, %g0";
8960 case MODE_TI:
8961 return "%vpxor\t%0, %d0";
8962 case MODE_V2DF:
8963 return "%vxorpd\t%0, %d0";
8964 case MODE_V4SF:
8965 return "%vxorps\t%0, %d0";
8967 case MODE_OI:
8968 return "vpxor\t%x0, %x0, %x0";
8969 case MODE_V4DF:
8970 return "vxorpd\t%x0, %x0, %x0";
8971 case MODE_V8SF:
8972 return "vxorps\t%x0, %x0, %x0";
8974 default:
8975 break;
8978 case 2:
8979 if (get_attr_mode (insn) == MODE_XI
8980 || get_attr_mode (insn) == MODE_V8DF
8981 || get_attr_mode (insn) == MODE_V16SF)
8982 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8983 if (TARGET_AVX)
8984 return "vpcmpeqd\t%0, %0, %0";
8985 else
8986 return "pcmpeqd\t%0, %0";
8988 default:
8989 break;
8991 gcc_unreachable ();
8994 /* Returns true if OP contains a symbol reference */
8996 bool
8997 symbolic_reference_mentioned_p (rtx op)
8999 const char *fmt;
9000 int i;
9002 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9003 return true;
9005 fmt = GET_RTX_FORMAT (GET_CODE (op));
9006 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9008 if (fmt[i] == 'E')
9010 int j;
9012 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9013 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9014 return true;
9017 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9018 return true;
9021 return false;
9024 /* Return true if it is appropriate to emit `ret' instructions in the
9025 body of a function. Do this only if the epilogue is simple, needing a
9026 couple of insns. Prior to reloading, we can't tell how many registers
9027 must be saved, so return false then. Return false if there is no frame
9028 marker to de-allocate. */
9030 bool
9031 ix86_can_use_return_insn_p (void)
9033 struct ix86_frame frame;
9035 if (! reload_completed || frame_pointer_needed)
9036 return 0;
9038 /* Don't allow more than 32k pop, since that's all we can do
9039 with one instruction. */
9040 if (crtl->args.pops_args && crtl->args.size >= 32768)
9041 return 0;
9043 ix86_compute_frame_layout (&frame);
9044 return (frame.stack_pointer_offset == UNITS_PER_WORD
9045 && (frame.nregs + frame.nsseregs) == 0);
9048 /* Value should be nonzero if functions must have frame pointers.
9049 Zero means the frame pointer need not be set up (and parms may
9050 be accessed via the stack pointer) in functions that seem suitable. */
9052 static bool
9053 ix86_frame_pointer_required (void)
9055 /* If we accessed previous frames, then the generated code expects
9056 to be able to access the saved ebp value in our frame. */
9057 if (cfun->machine->accesses_prev_frame)
9058 return true;
9060 /* Several x86 os'es need a frame pointer for other reasons,
9061 usually pertaining to setjmp. */
9062 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9063 return true;
9065 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9066 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9067 return true;
9069 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9070 allocation is 4GB. */
9071 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9072 return true;
9074 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9075 turns off the frame pointer by default. Turn it back on now if
9076 we've not got a leaf function. */
9077 if (TARGET_OMIT_LEAF_FRAME_POINTER
9078 && (!crtl->is_leaf
9079 || ix86_current_function_calls_tls_descriptor))
9080 return true;
9082 if (crtl->profile && !flag_fentry)
9083 return true;
9085 return false;
9088 /* Record that the current function accesses previous call frames. */
9090 void
9091 ix86_setup_frame_addresses (void)
9093 cfun->machine->accesses_prev_frame = 1;
9096 #ifndef USE_HIDDEN_LINKONCE
9097 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9098 # define USE_HIDDEN_LINKONCE 1
9099 # else
9100 # define USE_HIDDEN_LINKONCE 0
9101 # endif
9102 #endif
9104 static int pic_labels_used;
9106 /* Fills in the label name that should be used for a pc thunk for
9107 the given register. */
9109 static void
9110 get_pc_thunk_name (char name[32], unsigned int regno)
9112 gcc_assert (!TARGET_64BIT);
9114 if (USE_HIDDEN_LINKONCE)
9115 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9116 else
9117 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9121 /* This function generates code for -fpic that loads %ebx with
9122 the return address of the caller and then returns. */
9124 static void
9125 ix86_code_end (void)
9127 rtx xops[2];
9128 int regno;
9130 for (regno = AX_REG; regno <= SP_REG; regno++)
9132 char name[32];
9133 tree decl;
9135 if (!(pic_labels_used & (1 << regno)))
9136 continue;
9138 get_pc_thunk_name (name, regno);
9140 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9141 get_identifier (name),
9142 build_function_type_list (void_type_node, NULL_TREE));
9143 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9144 NULL_TREE, void_type_node);
9145 TREE_PUBLIC (decl) = 1;
9146 TREE_STATIC (decl) = 1;
9147 DECL_IGNORED_P (decl) = 1;
9149 #if TARGET_MACHO
9150 if (TARGET_MACHO)
9152 switch_to_section (darwin_sections[text_coal_section]);
9153 fputs ("\t.weak_definition\t", asm_out_file);
9154 assemble_name (asm_out_file, name);
9155 fputs ("\n\t.private_extern\t", asm_out_file);
9156 assemble_name (asm_out_file, name);
9157 putc ('\n', asm_out_file);
9158 ASM_OUTPUT_LABEL (asm_out_file, name);
9159 DECL_WEAK (decl) = 1;
9161 else
9162 #endif
9163 if (USE_HIDDEN_LINKONCE)
9165 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9167 targetm.asm_out.unique_section (decl, 0);
9168 switch_to_section (get_named_section (decl, NULL, 0));
9170 targetm.asm_out.globalize_label (asm_out_file, name);
9171 fputs ("\t.hidden\t", asm_out_file);
9172 assemble_name (asm_out_file, name);
9173 putc ('\n', asm_out_file);
9174 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9176 else
9178 switch_to_section (text_section);
9179 ASM_OUTPUT_LABEL (asm_out_file, name);
9182 DECL_INITIAL (decl) = make_node (BLOCK);
9183 current_function_decl = decl;
9184 init_function_start (decl);
9185 first_function_block_is_cold = false;
9186 /* Make sure unwind info is emitted for the thunk if needed. */
9187 final_start_function (emit_barrier (), asm_out_file, 1);
9189 /* Pad stack IP move with 4 instructions (two NOPs count
9190 as one instruction). */
9191 if (TARGET_PAD_SHORT_FUNCTION)
9193 int i = 8;
9195 while (i--)
9196 fputs ("\tnop\n", asm_out_file);
9199 xops[0] = gen_rtx_REG (Pmode, regno);
9200 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9201 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9202 fputs ("\tret\n", asm_out_file);
9203 final_end_function ();
9204 init_insn_lengths ();
9205 free_after_compilation (cfun);
9206 set_cfun (NULL);
9207 current_function_decl = NULL;
9210 if (flag_split_stack)
9211 file_end_indicate_split_stack ();
9214 /* Emit code for the SET_GOT patterns. */
9216 const char *
9217 output_set_got (rtx dest, rtx label)
9219 rtx xops[3];
9221 xops[0] = dest;
9223 if (TARGET_VXWORKS_RTP && flag_pic)
9225 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9226 xops[2] = gen_rtx_MEM (Pmode,
9227 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9228 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9230 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9231 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9232 an unadorned address. */
9233 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9234 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9235 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9236 return "";
9239 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9241 if (!flag_pic)
9243 if (TARGET_MACHO)
9244 /* We don't need a pic base, we're not producing pic. */
9245 gcc_unreachable ();
9247 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9248 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9249 targetm.asm_out.internal_label (asm_out_file, "L",
9250 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9252 else
9254 char name[32];
9255 get_pc_thunk_name (name, REGNO (dest));
9256 pic_labels_used |= 1 << REGNO (dest);
9258 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9259 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9260 output_asm_insn ("call\t%X2", xops);
9262 #if TARGET_MACHO
9263 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9264 This is what will be referenced by the Mach-O PIC subsystem. */
9265 if (machopic_should_output_picbase_label () || !label)
9266 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9268 /* When we are restoring the pic base at the site of a nonlocal label,
9269 and we decided to emit the pic base above, we will still output a
9270 local label used for calculating the correction offset (even though
9271 the offset will be 0 in that case). */
9272 if (label)
9273 targetm.asm_out.internal_label (asm_out_file, "L",
9274 CODE_LABEL_NUMBER (label));
9275 #endif
9278 if (!TARGET_MACHO)
9279 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9281 return "";
9284 /* Generate an "push" pattern for input ARG. */
9286 static rtx
9287 gen_push (rtx arg)
9289 struct machine_function *m = cfun->machine;
9291 if (m->fs.cfa_reg == stack_pointer_rtx)
9292 m->fs.cfa_offset += UNITS_PER_WORD;
9293 m->fs.sp_offset += UNITS_PER_WORD;
9295 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9296 arg = gen_rtx_REG (word_mode, REGNO (arg));
9298 return gen_rtx_SET (VOIDmode,
9299 gen_rtx_MEM (word_mode,
9300 gen_rtx_PRE_DEC (Pmode,
9301 stack_pointer_rtx)),
9302 arg);
9305 /* Generate an "pop" pattern for input ARG. */
9307 static rtx
9308 gen_pop (rtx arg)
9310 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9311 arg = gen_rtx_REG (word_mode, REGNO (arg));
9313 return gen_rtx_SET (VOIDmode,
9314 arg,
9315 gen_rtx_MEM (word_mode,
9316 gen_rtx_POST_INC (Pmode,
9317 stack_pointer_rtx)));
9320 /* Return >= 0 if there is an unused call-clobbered register available
9321 for the entire function. */
9323 static unsigned int
9324 ix86_select_alt_pic_regnum (void)
9326 if (crtl->is_leaf
9327 && !crtl->profile
9328 && !ix86_current_function_calls_tls_descriptor)
9330 int i, drap;
9331 /* Can't use the same register for both PIC and DRAP. */
9332 if (crtl->drap_reg)
9333 drap = REGNO (crtl->drap_reg);
9334 else
9335 drap = -1;
9336 for (i = 2; i >= 0; --i)
9337 if (i != drap && !df_regs_ever_live_p (i))
9338 return i;
9341 return INVALID_REGNUM;
9344 /* Return TRUE if we need to save REGNO. */
9346 static bool
9347 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9349 if (pic_offset_table_rtx
9350 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9351 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9352 || crtl->profile
9353 || crtl->calls_eh_return
9354 || crtl->uses_const_pool
9355 || cfun->has_nonlocal_label))
9356 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9358 if (crtl->calls_eh_return && maybe_eh_return)
9360 unsigned i;
9361 for (i = 0; ; i++)
9363 unsigned test = EH_RETURN_DATA_REGNO (i);
9364 if (test == INVALID_REGNUM)
9365 break;
9366 if (test == regno)
9367 return true;
9371 if (crtl->drap_reg
9372 && regno == REGNO (crtl->drap_reg)
9373 && !cfun->machine->no_drap_save_restore)
9374 return true;
9376 return (df_regs_ever_live_p (regno)
9377 && !call_used_regs[regno]
9378 && !fixed_regs[regno]
9379 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9382 /* Return number of saved general prupose registers. */
9384 static int
9385 ix86_nsaved_regs (void)
9387 int nregs = 0;
9388 int regno;
9390 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9391 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9392 nregs ++;
9393 return nregs;
9396 /* Return number of saved SSE registrers. */
9398 static int
9399 ix86_nsaved_sseregs (void)
9401 int nregs = 0;
9402 int regno;
9404 if (!TARGET_64BIT_MS_ABI)
9405 return 0;
9406 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9407 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9408 nregs ++;
9409 return nregs;
9412 /* Given FROM and TO register numbers, say whether this elimination is
9413 allowed. If stack alignment is needed, we can only replace argument
9414 pointer with hard frame pointer, or replace frame pointer with stack
9415 pointer. Otherwise, frame pointer elimination is automatically
9416 handled and all other eliminations are valid. */
9418 static bool
9419 ix86_can_eliminate (const int from, const int to)
9421 if (stack_realign_fp)
9422 return ((from == ARG_POINTER_REGNUM
9423 && to == HARD_FRAME_POINTER_REGNUM)
9424 || (from == FRAME_POINTER_REGNUM
9425 && to == STACK_POINTER_REGNUM));
9426 else
9427 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9430 /* Return the offset between two registers, one to be eliminated, and the other
9431 its replacement, at the start of a routine. */
9433 HOST_WIDE_INT
9434 ix86_initial_elimination_offset (int from, int to)
9436 struct ix86_frame frame;
9437 ix86_compute_frame_layout (&frame);
9439 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9440 return frame.hard_frame_pointer_offset;
9441 else if (from == FRAME_POINTER_REGNUM
9442 && to == HARD_FRAME_POINTER_REGNUM)
9443 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9444 else
9446 gcc_assert (to == STACK_POINTER_REGNUM);
9448 if (from == ARG_POINTER_REGNUM)
9449 return frame.stack_pointer_offset;
9451 gcc_assert (from == FRAME_POINTER_REGNUM);
9452 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9456 /* In a dynamically-aligned function, we can't know the offset from
9457 stack pointer to frame pointer, so we must ensure that setjmp
9458 eliminates fp against the hard fp (%ebp) rather than trying to
9459 index from %esp up to the top of the frame across a gap that is
9460 of unknown (at compile-time) size. */
9461 static rtx
9462 ix86_builtin_setjmp_frame_value (void)
9464 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9467 /* When using -fsplit-stack, the allocation routines set a field in
9468 the TCB to the bottom of the stack plus this much space, measured
9469 in bytes. */
9471 #define SPLIT_STACK_AVAILABLE 256
9473 /* Fill structure ix86_frame about frame of currently computed function. */
9475 static void
9476 ix86_compute_frame_layout (struct ix86_frame *frame)
9478 unsigned HOST_WIDE_INT stack_alignment_needed;
9479 HOST_WIDE_INT offset;
9480 unsigned HOST_WIDE_INT preferred_alignment;
9481 HOST_WIDE_INT size = get_frame_size ();
9482 HOST_WIDE_INT to_allocate;
9484 frame->nregs = ix86_nsaved_regs ();
9485 frame->nsseregs = ix86_nsaved_sseregs ();
9487 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9488 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9490 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9491 function prologues and leaf. */
9492 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9493 && (!crtl->is_leaf || cfun->calls_alloca != 0
9494 || ix86_current_function_calls_tls_descriptor))
9496 preferred_alignment = 16;
9497 stack_alignment_needed = 16;
9498 crtl->preferred_stack_boundary = 128;
9499 crtl->stack_alignment_needed = 128;
9502 gcc_assert (!size || stack_alignment_needed);
9503 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9504 gcc_assert (preferred_alignment <= stack_alignment_needed);
9506 /* For SEH we have to limit the amount of code movement into the prologue.
9507 At present we do this via a BLOCKAGE, at which point there's very little
9508 scheduling that can be done, which means that there's very little point
9509 in doing anything except PUSHs. */
9510 if (TARGET_SEH)
9511 cfun->machine->use_fast_prologue_epilogue = false;
9513 /* During reload iteration the amount of registers saved can change.
9514 Recompute the value as needed. Do not recompute when amount of registers
9515 didn't change as reload does multiple calls to the function and does not
9516 expect the decision to change within single iteration. */
9517 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9518 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9520 int count = frame->nregs;
9521 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9523 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9525 /* The fast prologue uses move instead of push to save registers. This
9526 is significantly longer, but also executes faster as modern hardware
9527 can execute the moves in parallel, but can't do that for push/pop.
9529 Be careful about choosing what prologue to emit: When function takes
9530 many instructions to execute we may use slow version as well as in
9531 case function is known to be outside hot spot (this is known with
9532 feedback only). Weight the size of function by number of registers
9533 to save as it is cheap to use one or two push instructions but very
9534 slow to use many of them. */
9535 if (count)
9536 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9537 if (node->frequency < NODE_FREQUENCY_NORMAL
9538 || (flag_branch_probabilities
9539 && node->frequency < NODE_FREQUENCY_HOT))
9540 cfun->machine->use_fast_prologue_epilogue = false;
9541 else
9542 cfun->machine->use_fast_prologue_epilogue
9543 = !expensive_function_p (count);
9546 frame->save_regs_using_mov
9547 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9548 /* If static stack checking is enabled and done with probes,
9549 the registers need to be saved before allocating the frame. */
9550 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9552 /* Skip return address. */
9553 offset = UNITS_PER_WORD;
9555 /* Skip pushed static chain. */
9556 if (ix86_static_chain_on_stack)
9557 offset += UNITS_PER_WORD;
9559 /* Skip saved base pointer. */
9560 if (frame_pointer_needed)
9561 offset += UNITS_PER_WORD;
9562 frame->hfp_save_offset = offset;
9564 /* The traditional frame pointer location is at the top of the frame. */
9565 frame->hard_frame_pointer_offset = offset;
9567 /* Register save area */
9568 offset += frame->nregs * UNITS_PER_WORD;
9569 frame->reg_save_offset = offset;
9571 /* On SEH target, registers are pushed just before the frame pointer
9572 location. */
9573 if (TARGET_SEH)
9574 frame->hard_frame_pointer_offset = offset;
9576 /* Align and set SSE register save area. */
9577 if (frame->nsseregs)
9579 /* The only ABI that has saved SSE registers (Win64) also has a
9580 16-byte aligned default stack, and thus we don't need to be
9581 within the re-aligned local stack frame to save them. */
9582 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9583 offset = (offset + 16 - 1) & -16;
9584 offset += frame->nsseregs * 16;
9586 frame->sse_reg_save_offset = offset;
9588 /* The re-aligned stack starts here. Values before this point are not
9589 directly comparable with values below this point. In order to make
9590 sure that no value happens to be the same before and after, force
9591 the alignment computation below to add a non-zero value. */
9592 if (stack_realign_fp)
9593 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9595 /* Va-arg area */
9596 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9597 offset += frame->va_arg_size;
9599 /* Align start of frame for local function. */
9600 if (stack_realign_fp
9601 || offset != frame->sse_reg_save_offset
9602 || size != 0
9603 || !crtl->is_leaf
9604 || cfun->calls_alloca
9605 || ix86_current_function_calls_tls_descriptor)
9606 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9608 /* Frame pointer points here. */
9609 frame->frame_pointer_offset = offset;
9611 offset += size;
9613 /* Add outgoing arguments area. Can be skipped if we eliminated
9614 all the function calls as dead code.
9615 Skipping is however impossible when function calls alloca. Alloca
9616 expander assumes that last crtl->outgoing_args_size
9617 of stack frame are unused. */
9618 if (ACCUMULATE_OUTGOING_ARGS
9619 && (!crtl->is_leaf || cfun->calls_alloca
9620 || ix86_current_function_calls_tls_descriptor))
9622 offset += crtl->outgoing_args_size;
9623 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9625 else
9626 frame->outgoing_arguments_size = 0;
9628 /* Align stack boundary. Only needed if we're calling another function
9629 or using alloca. */
9630 if (!crtl->is_leaf || cfun->calls_alloca
9631 || ix86_current_function_calls_tls_descriptor)
9632 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9634 /* We've reached end of stack frame. */
9635 frame->stack_pointer_offset = offset;
9637 /* Size prologue needs to allocate. */
9638 to_allocate = offset - frame->sse_reg_save_offset;
9640 if ((!to_allocate && frame->nregs <= 1)
9641 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9642 frame->save_regs_using_mov = false;
9644 if (ix86_using_red_zone ()
9645 && crtl->sp_is_unchanging
9646 && crtl->is_leaf
9647 && !ix86_current_function_calls_tls_descriptor)
9649 frame->red_zone_size = to_allocate;
9650 if (frame->save_regs_using_mov)
9651 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9652 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9653 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9655 else
9656 frame->red_zone_size = 0;
9657 frame->stack_pointer_offset -= frame->red_zone_size;
9659 /* The SEH frame pointer location is near the bottom of the frame.
9660 This is enforced by the fact that the difference between the
9661 stack pointer and the frame pointer is limited to 240 bytes in
9662 the unwind data structure. */
9663 if (TARGET_SEH)
9665 HOST_WIDE_INT diff;
9667 /* If we can leave the frame pointer where it is, do so. Also, returns
9668 the establisher frame for __builtin_frame_address (0). */
9669 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9670 if (diff <= SEH_MAX_FRAME_SIZE
9671 && (diff > 240 || (diff & 15) != 0)
9672 && !crtl->accesses_prior_frames)
9674 /* Ideally we'd determine what portion of the local stack frame
9675 (within the constraint of the lowest 240) is most heavily used.
9676 But without that complication, simply bias the frame pointer
9677 by 128 bytes so as to maximize the amount of the local stack
9678 frame that is addressable with 8-bit offsets. */
9679 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9684 /* This is semi-inlined memory_address_length, but simplified
9685 since we know that we're always dealing with reg+offset, and
9686 to avoid having to create and discard all that rtl. */
9688 static inline int
9689 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9691 int len = 4;
9693 if (offset == 0)
9695 /* EBP and R13 cannot be encoded without an offset. */
9696 len = (regno == BP_REG || regno == R13_REG);
9698 else if (IN_RANGE (offset, -128, 127))
9699 len = 1;
9701 /* ESP and R12 must be encoded with a SIB byte. */
9702 if (regno == SP_REG || regno == R12_REG)
9703 len++;
9705 return len;
9708 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9709 The valid base registers are taken from CFUN->MACHINE->FS. */
9711 static rtx
9712 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9714 const struct machine_function *m = cfun->machine;
9715 rtx base_reg = NULL;
9716 HOST_WIDE_INT base_offset = 0;
9718 if (m->use_fast_prologue_epilogue)
9720 /* Choose the base register most likely to allow the most scheduling
9721 opportunities. Generally FP is valid throughout the function,
9722 while DRAP must be reloaded within the epilogue. But choose either
9723 over the SP due to increased encoding size. */
9725 if (m->fs.fp_valid)
9727 base_reg = hard_frame_pointer_rtx;
9728 base_offset = m->fs.fp_offset - cfa_offset;
9730 else if (m->fs.drap_valid)
9732 base_reg = crtl->drap_reg;
9733 base_offset = 0 - cfa_offset;
9735 else if (m->fs.sp_valid)
9737 base_reg = stack_pointer_rtx;
9738 base_offset = m->fs.sp_offset - cfa_offset;
9741 else
9743 HOST_WIDE_INT toffset;
9744 int len = 16, tlen;
9746 /* Choose the base register with the smallest address encoding.
9747 With a tie, choose FP > DRAP > SP. */
9748 if (m->fs.sp_valid)
9750 base_reg = stack_pointer_rtx;
9751 base_offset = m->fs.sp_offset - cfa_offset;
9752 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9754 if (m->fs.drap_valid)
9756 toffset = 0 - cfa_offset;
9757 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9758 if (tlen <= len)
9760 base_reg = crtl->drap_reg;
9761 base_offset = toffset;
9762 len = tlen;
9765 if (m->fs.fp_valid)
9767 toffset = m->fs.fp_offset - cfa_offset;
9768 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9769 if (tlen <= len)
9771 base_reg = hard_frame_pointer_rtx;
9772 base_offset = toffset;
9773 len = tlen;
9777 gcc_assert (base_reg != NULL);
9779 return plus_constant (Pmode, base_reg, base_offset);
9782 /* Emit code to save registers in the prologue. */
9784 static void
9785 ix86_emit_save_regs (void)
9787 unsigned int regno;
9788 rtx insn;
9790 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9791 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9793 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9794 RTX_FRAME_RELATED_P (insn) = 1;
9798 /* Emit a single register save at CFA - CFA_OFFSET. */
9800 static void
9801 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9802 HOST_WIDE_INT cfa_offset)
9804 struct machine_function *m = cfun->machine;
9805 rtx reg = gen_rtx_REG (mode, regno);
9806 rtx mem, addr, base, insn;
9808 addr = choose_baseaddr (cfa_offset);
9809 mem = gen_frame_mem (mode, addr);
9811 /* For SSE saves, we need to indicate the 128-bit alignment. */
9812 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9814 insn = emit_move_insn (mem, reg);
9815 RTX_FRAME_RELATED_P (insn) = 1;
9817 base = addr;
9818 if (GET_CODE (base) == PLUS)
9819 base = XEXP (base, 0);
9820 gcc_checking_assert (REG_P (base));
9822 /* When saving registers into a re-aligned local stack frame, avoid
9823 any tricky guessing by dwarf2out. */
9824 if (m->fs.realigned)
9826 gcc_checking_assert (stack_realign_drap);
9828 if (regno == REGNO (crtl->drap_reg))
9830 /* A bit of a hack. We force the DRAP register to be saved in
9831 the re-aligned stack frame, which provides us with a copy
9832 of the CFA that will last past the prologue. Install it. */
9833 gcc_checking_assert (cfun->machine->fs.fp_valid);
9834 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9835 cfun->machine->fs.fp_offset - cfa_offset);
9836 mem = gen_rtx_MEM (mode, addr);
9837 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9839 else
9841 /* The frame pointer is a stable reference within the
9842 aligned frame. Use it. */
9843 gcc_checking_assert (cfun->machine->fs.fp_valid);
9844 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9845 cfun->machine->fs.fp_offset - cfa_offset);
9846 mem = gen_rtx_MEM (mode, addr);
9847 add_reg_note (insn, REG_CFA_EXPRESSION,
9848 gen_rtx_SET (VOIDmode, mem, reg));
9852 /* The memory may not be relative to the current CFA register,
9853 which means that we may need to generate a new pattern for
9854 use by the unwind info. */
9855 else if (base != m->fs.cfa_reg)
9857 addr = plus_constant (Pmode, m->fs.cfa_reg,
9858 m->fs.cfa_offset - cfa_offset);
9859 mem = gen_rtx_MEM (mode, addr);
9860 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9864 /* Emit code to save registers using MOV insns.
9865 First register is stored at CFA - CFA_OFFSET. */
9866 static void
9867 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9869 unsigned int regno;
9871 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9872 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9874 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9875 cfa_offset -= UNITS_PER_WORD;
9879 /* Emit code to save SSE registers using MOV insns.
9880 First register is stored at CFA - CFA_OFFSET. */
9881 static void
9882 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9884 unsigned int regno;
9886 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9887 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9889 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9890 cfa_offset -= 16;
9894 static GTY(()) rtx queued_cfa_restores;
9896 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9897 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9898 Don't add the note if the previously saved value will be left untouched
9899 within stack red-zone till return, as unwinders can find the same value
9900 in the register and on the stack. */
9902 static void
9903 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9905 if (!crtl->shrink_wrapped
9906 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9907 return;
9909 if (insn)
9911 add_reg_note (insn, REG_CFA_RESTORE, reg);
9912 RTX_FRAME_RELATED_P (insn) = 1;
9914 else
9915 queued_cfa_restores
9916 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9919 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9921 static void
9922 ix86_add_queued_cfa_restore_notes (rtx insn)
9924 rtx last;
9925 if (!queued_cfa_restores)
9926 return;
9927 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9929 XEXP (last, 1) = REG_NOTES (insn);
9930 REG_NOTES (insn) = queued_cfa_restores;
9931 queued_cfa_restores = NULL_RTX;
9932 RTX_FRAME_RELATED_P (insn) = 1;
9935 /* Expand prologue or epilogue stack adjustment.
9936 The pattern exist to put a dependency on all ebp-based memory accesses.
9937 STYLE should be negative if instructions should be marked as frame related,
9938 zero if %r11 register is live and cannot be freely used and positive
9939 otherwise. */
9941 static void
9942 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9943 int style, bool set_cfa)
9945 struct machine_function *m = cfun->machine;
9946 rtx insn;
9947 bool add_frame_related_expr = false;
9949 if (Pmode == SImode)
9950 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9951 else if (x86_64_immediate_operand (offset, DImode))
9952 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9953 else
9955 rtx tmp;
9956 /* r11 is used by indirect sibcall return as well, set before the
9957 epilogue and used after the epilogue. */
9958 if (style)
9959 tmp = gen_rtx_REG (DImode, R11_REG);
9960 else
9962 gcc_assert (src != hard_frame_pointer_rtx
9963 && dest != hard_frame_pointer_rtx);
9964 tmp = hard_frame_pointer_rtx;
9966 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9967 if (style < 0)
9968 add_frame_related_expr = true;
9970 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9973 insn = emit_insn (insn);
9974 if (style >= 0)
9975 ix86_add_queued_cfa_restore_notes (insn);
9977 if (set_cfa)
9979 rtx r;
9981 gcc_assert (m->fs.cfa_reg == src);
9982 m->fs.cfa_offset += INTVAL (offset);
9983 m->fs.cfa_reg = dest;
9985 r = gen_rtx_PLUS (Pmode, src, offset);
9986 r = gen_rtx_SET (VOIDmode, dest, r);
9987 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9988 RTX_FRAME_RELATED_P (insn) = 1;
9990 else if (style < 0)
9992 RTX_FRAME_RELATED_P (insn) = 1;
9993 if (add_frame_related_expr)
9995 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9996 r = gen_rtx_SET (VOIDmode, dest, r);
9997 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10001 if (dest == stack_pointer_rtx)
10003 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10004 bool valid = m->fs.sp_valid;
10006 if (src == hard_frame_pointer_rtx)
10008 valid = m->fs.fp_valid;
10009 ooffset = m->fs.fp_offset;
10011 else if (src == crtl->drap_reg)
10013 valid = m->fs.drap_valid;
10014 ooffset = 0;
10016 else
10018 /* Else there are two possibilities: SP itself, which we set
10019 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10020 taken care of this by hand along the eh_return path. */
10021 gcc_checking_assert (src == stack_pointer_rtx
10022 || offset == const0_rtx);
10025 m->fs.sp_offset = ooffset - INTVAL (offset);
10026 m->fs.sp_valid = valid;
10030 /* Find an available register to be used as dynamic realign argument
10031 pointer regsiter. Such a register will be written in prologue and
10032 used in begin of body, so it must not be
10033 1. parameter passing register.
10034 2. GOT pointer.
10035 We reuse static-chain register if it is available. Otherwise, we
10036 use DI for i386 and R13 for x86-64. We chose R13 since it has
10037 shorter encoding.
10039 Return: the regno of chosen register. */
10041 static unsigned int
10042 find_drap_reg (void)
10044 tree decl = cfun->decl;
10046 if (TARGET_64BIT)
10048 /* Use R13 for nested function or function need static chain.
10049 Since function with tail call may use any caller-saved
10050 registers in epilogue, DRAP must not use caller-saved
10051 register in such case. */
10052 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10053 return R13_REG;
10055 return R10_REG;
10057 else
10059 /* Use DI for nested function or function need static chain.
10060 Since function with tail call may use any caller-saved
10061 registers in epilogue, DRAP must not use caller-saved
10062 register in such case. */
10063 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10064 return DI_REG;
10066 /* Reuse static chain register if it isn't used for parameter
10067 passing. */
10068 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10070 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10071 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10072 return CX_REG;
10074 return DI_REG;
10078 /* Return minimum incoming stack alignment. */
10080 static unsigned int
10081 ix86_minimum_incoming_stack_boundary (bool sibcall)
10083 unsigned int incoming_stack_boundary;
10085 /* Prefer the one specified at command line. */
10086 if (ix86_user_incoming_stack_boundary)
10087 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10088 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10089 if -mstackrealign is used, it isn't used for sibcall check and
10090 estimated stack alignment is 128bit. */
10091 else if (!sibcall
10092 && !TARGET_64BIT
10093 && ix86_force_align_arg_pointer
10094 && crtl->stack_alignment_estimated == 128)
10095 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10096 else
10097 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10099 /* Incoming stack alignment can be changed on individual functions
10100 via force_align_arg_pointer attribute. We use the smallest
10101 incoming stack boundary. */
10102 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10103 && lookup_attribute (ix86_force_align_arg_pointer_string,
10104 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10105 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10107 /* The incoming stack frame has to be aligned at least at
10108 parm_stack_boundary. */
10109 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10110 incoming_stack_boundary = crtl->parm_stack_boundary;
10112 /* Stack at entrance of main is aligned by runtime. We use the
10113 smallest incoming stack boundary. */
10114 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10115 && DECL_NAME (current_function_decl)
10116 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10117 && DECL_FILE_SCOPE_P (current_function_decl))
10118 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10120 return incoming_stack_boundary;
10123 /* Update incoming stack boundary and estimated stack alignment. */
10125 static void
10126 ix86_update_stack_boundary (void)
10128 ix86_incoming_stack_boundary
10129 = ix86_minimum_incoming_stack_boundary (false);
10131 /* x86_64 vararg needs 16byte stack alignment for register save
10132 area. */
10133 if (TARGET_64BIT
10134 && cfun->stdarg
10135 && crtl->stack_alignment_estimated < 128)
10136 crtl->stack_alignment_estimated = 128;
10139 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10140 needed or an rtx for DRAP otherwise. */
10142 static rtx
10143 ix86_get_drap_rtx (void)
10145 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10146 crtl->need_drap = true;
10148 if (stack_realign_drap)
10150 /* Assign DRAP to vDRAP and returns vDRAP */
10151 unsigned int regno = find_drap_reg ();
10152 rtx drap_vreg;
10153 rtx arg_ptr;
10154 rtx seq, insn;
10156 arg_ptr = gen_rtx_REG (Pmode, regno);
10157 crtl->drap_reg = arg_ptr;
10159 start_sequence ();
10160 drap_vreg = copy_to_reg (arg_ptr);
10161 seq = get_insns ();
10162 end_sequence ();
10164 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10165 if (!optimize)
10167 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10168 RTX_FRAME_RELATED_P (insn) = 1;
10170 return drap_vreg;
10172 else
10173 return NULL;
10176 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10178 static rtx
10179 ix86_internal_arg_pointer (void)
10181 return virtual_incoming_args_rtx;
10184 struct scratch_reg {
10185 rtx reg;
10186 bool saved;
10189 /* Return a short-lived scratch register for use on function entry.
10190 In 32-bit mode, it is valid only after the registers are saved
10191 in the prologue. This register must be released by means of
10192 release_scratch_register_on_entry once it is dead. */
10194 static void
10195 get_scratch_register_on_entry (struct scratch_reg *sr)
10197 int regno;
10199 sr->saved = false;
10201 if (TARGET_64BIT)
10203 /* We always use R11 in 64-bit mode. */
10204 regno = R11_REG;
10206 else
10208 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10209 bool fastcall_p
10210 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10211 bool thiscall_p
10212 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10213 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10214 int regparm = ix86_function_regparm (fntype, decl);
10215 int drap_regno
10216 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10218 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10219 for the static chain register. */
10220 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10221 && drap_regno != AX_REG)
10222 regno = AX_REG;
10223 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10224 for the static chain register. */
10225 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10226 regno = AX_REG;
10227 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10228 regno = DX_REG;
10229 /* ecx is the static chain register. */
10230 else if (regparm < 3 && !fastcall_p && !thiscall_p
10231 && !static_chain_p
10232 && drap_regno != CX_REG)
10233 regno = CX_REG;
10234 else if (ix86_save_reg (BX_REG, true))
10235 regno = BX_REG;
10236 /* esi is the static chain register. */
10237 else if (!(regparm == 3 && static_chain_p)
10238 && ix86_save_reg (SI_REG, true))
10239 regno = SI_REG;
10240 else if (ix86_save_reg (DI_REG, true))
10241 regno = DI_REG;
10242 else
10244 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10245 sr->saved = true;
10249 sr->reg = gen_rtx_REG (Pmode, regno);
10250 if (sr->saved)
10252 rtx insn = emit_insn (gen_push (sr->reg));
10253 RTX_FRAME_RELATED_P (insn) = 1;
10257 /* Release a scratch register obtained from the preceding function. */
10259 static void
10260 release_scratch_register_on_entry (struct scratch_reg *sr)
10262 if (sr->saved)
10264 struct machine_function *m = cfun->machine;
10265 rtx x, insn = emit_insn (gen_pop (sr->reg));
10267 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10268 RTX_FRAME_RELATED_P (insn) = 1;
10269 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10270 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10271 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10272 m->fs.sp_offset -= UNITS_PER_WORD;
10276 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10278 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10280 static void
10281 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10283 /* We skip the probe for the first interval + a small dope of 4 words and
10284 probe that many bytes past the specified size to maintain a protection
10285 area at the botton of the stack. */
10286 const int dope = 4 * UNITS_PER_WORD;
10287 rtx size_rtx = GEN_INT (size), last;
10289 /* See if we have a constant small number of probes to generate. If so,
10290 that's the easy case. The run-time loop is made up of 11 insns in the
10291 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10292 for n # of intervals. */
10293 if (size <= 5 * PROBE_INTERVAL)
10295 HOST_WIDE_INT i, adjust;
10296 bool first_probe = true;
10298 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10299 values of N from 1 until it exceeds SIZE. If only one probe is
10300 needed, this will not generate any code. Then adjust and probe
10301 to PROBE_INTERVAL + SIZE. */
10302 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10304 if (first_probe)
10306 adjust = 2 * PROBE_INTERVAL + dope;
10307 first_probe = false;
10309 else
10310 adjust = PROBE_INTERVAL;
10312 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10313 plus_constant (Pmode, stack_pointer_rtx,
10314 -adjust)));
10315 emit_stack_probe (stack_pointer_rtx);
10318 if (first_probe)
10319 adjust = size + PROBE_INTERVAL + dope;
10320 else
10321 adjust = size + PROBE_INTERVAL - i;
10323 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10324 plus_constant (Pmode, stack_pointer_rtx,
10325 -adjust)));
10326 emit_stack_probe (stack_pointer_rtx);
10328 /* Adjust back to account for the additional first interval. */
10329 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10330 plus_constant (Pmode, stack_pointer_rtx,
10331 PROBE_INTERVAL + dope)));
10334 /* Otherwise, do the same as above, but in a loop. Note that we must be
10335 extra careful with variables wrapping around because we might be at
10336 the very top (or the very bottom) of the address space and we have
10337 to be able to handle this case properly; in particular, we use an
10338 equality test for the loop condition. */
10339 else
10341 HOST_WIDE_INT rounded_size;
10342 struct scratch_reg sr;
10344 get_scratch_register_on_entry (&sr);
10347 /* Step 1: round SIZE to the previous multiple of the interval. */
10349 rounded_size = size & -PROBE_INTERVAL;
10352 /* Step 2: compute initial and final value of the loop counter. */
10354 /* SP = SP_0 + PROBE_INTERVAL. */
10355 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10356 plus_constant (Pmode, stack_pointer_rtx,
10357 - (PROBE_INTERVAL + dope))));
10359 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10360 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10361 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10362 gen_rtx_PLUS (Pmode, sr.reg,
10363 stack_pointer_rtx)));
10366 /* Step 3: the loop
10368 while (SP != LAST_ADDR)
10370 SP = SP + PROBE_INTERVAL
10371 probe at SP
10374 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10375 values of N from 1 until it is equal to ROUNDED_SIZE. */
10377 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10380 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10381 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10383 if (size != rounded_size)
10385 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10386 plus_constant (Pmode, stack_pointer_rtx,
10387 rounded_size - size)));
10388 emit_stack_probe (stack_pointer_rtx);
10391 /* Adjust back to account for the additional first interval. */
10392 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10393 plus_constant (Pmode, stack_pointer_rtx,
10394 PROBE_INTERVAL + dope)));
10396 release_scratch_register_on_entry (&sr);
10399 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10401 /* Even if the stack pointer isn't the CFA register, we need to correctly
10402 describe the adjustments made to it, in particular differentiate the
10403 frame-related ones from the frame-unrelated ones. */
10404 if (size > 0)
10406 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10407 XVECEXP (expr, 0, 0)
10408 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10409 plus_constant (Pmode, stack_pointer_rtx, -size));
10410 XVECEXP (expr, 0, 1)
10411 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10412 plus_constant (Pmode, stack_pointer_rtx,
10413 PROBE_INTERVAL + dope + size));
10414 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10415 RTX_FRAME_RELATED_P (last) = 1;
10417 cfun->machine->fs.sp_offset += size;
10420 /* Make sure nothing is scheduled before we are done. */
10421 emit_insn (gen_blockage ());
10424 /* Adjust the stack pointer up to REG while probing it. */
10426 const char *
10427 output_adjust_stack_and_probe (rtx reg)
10429 static int labelno = 0;
10430 char loop_lab[32], end_lab[32];
10431 rtx xops[2];
10433 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10434 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10436 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10438 /* Jump to END_LAB if SP == LAST_ADDR. */
10439 xops[0] = stack_pointer_rtx;
10440 xops[1] = reg;
10441 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10442 fputs ("\tje\t", asm_out_file);
10443 assemble_name_raw (asm_out_file, end_lab);
10444 fputc ('\n', asm_out_file);
10446 /* SP = SP + PROBE_INTERVAL. */
10447 xops[1] = GEN_INT (PROBE_INTERVAL);
10448 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10450 /* Probe at SP. */
10451 xops[1] = const0_rtx;
10452 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10454 fprintf (asm_out_file, "\tjmp\t");
10455 assemble_name_raw (asm_out_file, loop_lab);
10456 fputc ('\n', asm_out_file);
10458 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10460 return "";
10463 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10464 inclusive. These are offsets from the current stack pointer. */
10466 static void
10467 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10469 /* See if we have a constant small number of probes to generate. If so,
10470 that's the easy case. The run-time loop is made up of 7 insns in the
10471 generic case while the compile-time loop is made up of n insns for n #
10472 of intervals. */
10473 if (size <= 7 * PROBE_INTERVAL)
10475 HOST_WIDE_INT i;
10477 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10478 it exceeds SIZE. If only one probe is needed, this will not
10479 generate any code. Then probe at FIRST + SIZE. */
10480 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10481 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10482 -(first + i)));
10484 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10485 -(first + size)));
10488 /* Otherwise, do the same as above, but in a loop. Note that we must be
10489 extra careful with variables wrapping around because we might be at
10490 the very top (or the very bottom) of the address space and we have
10491 to be able to handle this case properly; in particular, we use an
10492 equality test for the loop condition. */
10493 else
10495 HOST_WIDE_INT rounded_size, last;
10496 struct scratch_reg sr;
10498 get_scratch_register_on_entry (&sr);
10501 /* Step 1: round SIZE to the previous multiple of the interval. */
10503 rounded_size = size & -PROBE_INTERVAL;
10506 /* Step 2: compute initial and final value of the loop counter. */
10508 /* TEST_OFFSET = FIRST. */
10509 emit_move_insn (sr.reg, GEN_INT (-first));
10511 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10512 last = first + rounded_size;
10515 /* Step 3: the loop
10517 while (TEST_ADDR != LAST_ADDR)
10519 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10520 probe at TEST_ADDR
10523 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10524 until it is equal to ROUNDED_SIZE. */
10526 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10529 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10530 that SIZE is equal to ROUNDED_SIZE. */
10532 if (size != rounded_size)
10533 emit_stack_probe (plus_constant (Pmode,
10534 gen_rtx_PLUS (Pmode,
10535 stack_pointer_rtx,
10536 sr.reg),
10537 rounded_size - size));
10539 release_scratch_register_on_entry (&sr);
10542 /* Make sure nothing is scheduled before we are done. */
10543 emit_insn (gen_blockage ());
10546 /* Probe a range of stack addresses from REG to END, inclusive. These are
10547 offsets from the current stack pointer. */
10549 const char *
10550 output_probe_stack_range (rtx reg, rtx end)
10552 static int labelno = 0;
10553 char loop_lab[32], end_lab[32];
10554 rtx xops[3];
10556 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10557 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10559 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10561 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10562 xops[0] = reg;
10563 xops[1] = end;
10564 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10565 fputs ("\tje\t", asm_out_file);
10566 assemble_name_raw (asm_out_file, end_lab);
10567 fputc ('\n', asm_out_file);
10569 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10570 xops[1] = GEN_INT (PROBE_INTERVAL);
10571 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10573 /* Probe at TEST_ADDR. */
10574 xops[0] = stack_pointer_rtx;
10575 xops[1] = reg;
10576 xops[2] = const0_rtx;
10577 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10579 fprintf (asm_out_file, "\tjmp\t");
10580 assemble_name_raw (asm_out_file, loop_lab);
10581 fputc ('\n', asm_out_file);
10583 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10585 return "";
10588 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10589 to be generated in correct form. */
10590 static void
10591 ix86_finalize_stack_realign_flags (void)
10593 /* Check if stack realign is really needed after reload, and
10594 stores result in cfun */
10595 unsigned int incoming_stack_boundary
10596 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10597 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10598 unsigned int stack_realign = (incoming_stack_boundary
10599 < (crtl->is_leaf
10600 ? crtl->max_used_stack_slot_alignment
10601 : crtl->stack_alignment_needed));
10603 if (crtl->stack_realign_finalized)
10605 /* After stack_realign_needed is finalized, we can't no longer
10606 change it. */
10607 gcc_assert (crtl->stack_realign_needed == stack_realign);
10608 return;
10611 /* If the only reason for frame_pointer_needed is that we conservatively
10612 assumed stack realignment might be needed, but in the end nothing that
10613 needed the stack alignment had been spilled, clear frame_pointer_needed
10614 and say we don't need stack realignment. */
10615 if (stack_realign
10616 && frame_pointer_needed
10617 && crtl->is_leaf
10618 && flag_omit_frame_pointer
10619 && crtl->sp_is_unchanging
10620 && !ix86_current_function_calls_tls_descriptor
10621 && !crtl->accesses_prior_frames
10622 && !cfun->calls_alloca
10623 && !crtl->calls_eh_return
10624 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10625 && !ix86_frame_pointer_required ()
10626 && get_frame_size () == 0
10627 && ix86_nsaved_sseregs () == 0
10628 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10630 HARD_REG_SET set_up_by_prologue, prologue_used;
10631 basic_block bb;
10633 CLEAR_HARD_REG_SET (prologue_used);
10634 CLEAR_HARD_REG_SET (set_up_by_prologue);
10635 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10636 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10637 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10638 HARD_FRAME_POINTER_REGNUM);
10639 FOR_EACH_BB_FN (bb, cfun)
10641 rtx insn;
10642 FOR_BB_INSNS (bb, insn)
10643 if (NONDEBUG_INSN_P (insn)
10644 && requires_stack_frame_p (insn, prologue_used,
10645 set_up_by_prologue))
10647 crtl->stack_realign_needed = stack_realign;
10648 crtl->stack_realign_finalized = true;
10649 return;
10653 /* If drap has been set, but it actually isn't live at the start
10654 of the function, there is no reason to set it up. */
10655 if (crtl->drap_reg)
10657 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10658 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10660 crtl->drap_reg = NULL_RTX;
10661 crtl->need_drap = false;
10664 else
10665 cfun->machine->no_drap_save_restore = true;
10667 frame_pointer_needed = false;
10668 stack_realign = false;
10669 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10670 crtl->stack_alignment_needed = incoming_stack_boundary;
10671 crtl->stack_alignment_estimated = incoming_stack_boundary;
10672 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10673 crtl->preferred_stack_boundary = incoming_stack_boundary;
10674 df_finish_pass (true);
10675 df_scan_alloc (NULL);
10676 df_scan_blocks ();
10677 df_compute_regs_ever_live (true);
10678 df_analyze ();
10681 crtl->stack_realign_needed = stack_realign;
10682 crtl->stack_realign_finalized = true;
10685 /* Expand the prologue into a bunch of separate insns. */
10687 void
10688 ix86_expand_prologue (void)
10690 struct machine_function *m = cfun->machine;
10691 rtx insn, t;
10692 bool pic_reg_used;
10693 struct ix86_frame frame;
10694 HOST_WIDE_INT allocate;
10695 bool int_registers_saved;
10696 bool sse_registers_saved;
10698 ix86_finalize_stack_realign_flags ();
10700 /* DRAP should not coexist with stack_realign_fp */
10701 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10703 memset (&m->fs, 0, sizeof (m->fs));
10705 /* Initialize CFA state for before the prologue. */
10706 m->fs.cfa_reg = stack_pointer_rtx;
10707 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10709 /* Track SP offset to the CFA. We continue tracking this after we've
10710 swapped the CFA register away from SP. In the case of re-alignment
10711 this is fudged; we're interested to offsets within the local frame. */
10712 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10713 m->fs.sp_valid = true;
10715 ix86_compute_frame_layout (&frame);
10717 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10719 /* We should have already generated an error for any use of
10720 ms_hook on a nested function. */
10721 gcc_checking_assert (!ix86_static_chain_on_stack);
10723 /* Check if profiling is active and we shall use profiling before
10724 prologue variant. If so sorry. */
10725 if (crtl->profile && flag_fentry != 0)
10726 sorry ("ms_hook_prologue attribute isn%'t compatible "
10727 "with -mfentry for 32-bit");
10729 /* In ix86_asm_output_function_label we emitted:
10730 8b ff movl.s %edi,%edi
10731 55 push %ebp
10732 8b ec movl.s %esp,%ebp
10734 This matches the hookable function prologue in Win32 API
10735 functions in Microsoft Windows XP Service Pack 2 and newer.
10736 Wine uses this to enable Windows apps to hook the Win32 API
10737 functions provided by Wine.
10739 What that means is that we've already set up the frame pointer. */
10741 if (frame_pointer_needed
10742 && !(crtl->drap_reg && crtl->stack_realign_needed))
10744 rtx push, mov;
10746 /* We've decided to use the frame pointer already set up.
10747 Describe this to the unwinder by pretending that both
10748 push and mov insns happen right here.
10750 Putting the unwind info here at the end of the ms_hook
10751 is done so that we can make absolutely certain we get
10752 the required byte sequence at the start of the function,
10753 rather than relying on an assembler that can produce
10754 the exact encoding required.
10756 However it does mean (in the unpatched case) that we have
10757 a 1 insn window where the asynchronous unwind info is
10758 incorrect. However, if we placed the unwind info at
10759 its correct location we would have incorrect unwind info
10760 in the patched case. Which is probably all moot since
10761 I don't expect Wine generates dwarf2 unwind info for the
10762 system libraries that use this feature. */
10764 insn = emit_insn (gen_blockage ());
10766 push = gen_push (hard_frame_pointer_rtx);
10767 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10768 stack_pointer_rtx);
10769 RTX_FRAME_RELATED_P (push) = 1;
10770 RTX_FRAME_RELATED_P (mov) = 1;
10772 RTX_FRAME_RELATED_P (insn) = 1;
10773 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10774 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10776 /* Note that gen_push incremented m->fs.cfa_offset, even
10777 though we didn't emit the push insn here. */
10778 m->fs.cfa_reg = hard_frame_pointer_rtx;
10779 m->fs.fp_offset = m->fs.cfa_offset;
10780 m->fs.fp_valid = true;
10782 else
10784 /* The frame pointer is not needed so pop %ebp again.
10785 This leaves us with a pristine state. */
10786 emit_insn (gen_pop (hard_frame_pointer_rtx));
10790 /* The first insn of a function that accepts its static chain on the
10791 stack is to push the register that would be filled in by a direct
10792 call. This insn will be skipped by the trampoline. */
10793 else if (ix86_static_chain_on_stack)
10795 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10796 emit_insn (gen_blockage ());
10798 /* We don't want to interpret this push insn as a register save,
10799 only as a stack adjustment. The real copy of the register as
10800 a save will be done later, if needed. */
10801 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10802 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10803 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10804 RTX_FRAME_RELATED_P (insn) = 1;
10807 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10808 of DRAP is needed and stack realignment is really needed after reload */
10809 if (stack_realign_drap)
10811 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10813 /* Only need to push parameter pointer reg if it is caller saved. */
10814 if (!call_used_regs[REGNO (crtl->drap_reg)])
10816 /* Push arg pointer reg */
10817 insn = emit_insn (gen_push (crtl->drap_reg));
10818 RTX_FRAME_RELATED_P (insn) = 1;
10821 /* Grab the argument pointer. */
10822 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10823 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10824 RTX_FRAME_RELATED_P (insn) = 1;
10825 m->fs.cfa_reg = crtl->drap_reg;
10826 m->fs.cfa_offset = 0;
10828 /* Align the stack. */
10829 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10830 stack_pointer_rtx,
10831 GEN_INT (-align_bytes)));
10832 RTX_FRAME_RELATED_P (insn) = 1;
10834 /* Replicate the return address on the stack so that return
10835 address can be reached via (argp - 1) slot. This is needed
10836 to implement macro RETURN_ADDR_RTX and intrinsic function
10837 expand_builtin_return_addr etc. */
10838 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10839 t = gen_frame_mem (word_mode, t);
10840 insn = emit_insn (gen_push (t));
10841 RTX_FRAME_RELATED_P (insn) = 1;
10843 /* For the purposes of frame and register save area addressing,
10844 we've started over with a new frame. */
10845 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10846 m->fs.realigned = true;
10849 int_registers_saved = (frame.nregs == 0);
10850 sse_registers_saved = (frame.nsseregs == 0);
10852 if (frame_pointer_needed && !m->fs.fp_valid)
10854 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10855 slower on all targets. Also sdb doesn't like it. */
10856 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10857 RTX_FRAME_RELATED_P (insn) = 1;
10859 /* Push registers now, before setting the frame pointer
10860 on SEH target. */
10861 if (!int_registers_saved
10862 && TARGET_SEH
10863 && !frame.save_regs_using_mov)
10865 ix86_emit_save_regs ();
10866 int_registers_saved = true;
10867 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10870 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10872 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10873 RTX_FRAME_RELATED_P (insn) = 1;
10875 if (m->fs.cfa_reg == stack_pointer_rtx)
10876 m->fs.cfa_reg = hard_frame_pointer_rtx;
10877 m->fs.fp_offset = m->fs.sp_offset;
10878 m->fs.fp_valid = true;
10882 if (!int_registers_saved)
10884 /* If saving registers via PUSH, do so now. */
10885 if (!frame.save_regs_using_mov)
10887 ix86_emit_save_regs ();
10888 int_registers_saved = true;
10889 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10892 /* When using red zone we may start register saving before allocating
10893 the stack frame saving one cycle of the prologue. However, avoid
10894 doing this if we have to probe the stack; at least on x86_64 the
10895 stack probe can turn into a call that clobbers a red zone location. */
10896 else if (ix86_using_red_zone ()
10897 && (! TARGET_STACK_PROBE
10898 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10900 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10901 int_registers_saved = true;
10905 if (stack_realign_fp)
10907 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10908 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10910 /* The computation of the size of the re-aligned stack frame means
10911 that we must allocate the size of the register save area before
10912 performing the actual alignment. Otherwise we cannot guarantee
10913 that there's enough storage above the realignment point. */
10914 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10915 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10916 GEN_INT (m->fs.sp_offset
10917 - frame.sse_reg_save_offset),
10918 -1, false);
10920 /* Align the stack. */
10921 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10922 stack_pointer_rtx,
10923 GEN_INT (-align_bytes)));
10925 /* For the purposes of register save area addressing, the stack
10926 pointer is no longer valid. As for the value of sp_offset,
10927 see ix86_compute_frame_layout, which we need to match in order
10928 to pass verification of stack_pointer_offset at the end. */
10929 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10930 m->fs.sp_valid = false;
10933 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10935 if (flag_stack_usage_info)
10937 /* We start to count from ARG_POINTER. */
10938 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10940 /* If it was realigned, take into account the fake frame. */
10941 if (stack_realign_drap)
10943 if (ix86_static_chain_on_stack)
10944 stack_size += UNITS_PER_WORD;
10946 if (!call_used_regs[REGNO (crtl->drap_reg)])
10947 stack_size += UNITS_PER_WORD;
10949 /* This over-estimates by 1 minimal-stack-alignment-unit but
10950 mitigates that by counting in the new return address slot. */
10951 current_function_dynamic_stack_size
10952 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10955 current_function_static_stack_size = stack_size;
10958 /* On SEH target with very large frame size, allocate an area to save
10959 SSE registers (as the very large allocation won't be described). */
10960 if (TARGET_SEH
10961 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10962 && !sse_registers_saved)
10964 HOST_WIDE_INT sse_size =
10965 frame.sse_reg_save_offset - frame.reg_save_offset;
10967 gcc_assert (int_registers_saved);
10969 /* No need to do stack checking as the area will be immediately
10970 written. */
10971 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10972 GEN_INT (-sse_size), -1,
10973 m->fs.cfa_reg == stack_pointer_rtx);
10974 allocate -= sse_size;
10975 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10976 sse_registers_saved = true;
10979 /* The stack has already been decremented by the instruction calling us
10980 so probe if the size is non-negative to preserve the protection area. */
10981 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10983 /* We expect the registers to be saved when probes are used. */
10984 gcc_assert (int_registers_saved);
10986 if (STACK_CHECK_MOVING_SP)
10988 if (!(crtl->is_leaf && !cfun->calls_alloca
10989 && allocate <= PROBE_INTERVAL))
10991 ix86_adjust_stack_and_probe (allocate);
10992 allocate = 0;
10995 else
10997 HOST_WIDE_INT size = allocate;
10999 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11000 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11002 if (TARGET_STACK_PROBE)
11004 if (crtl->is_leaf && !cfun->calls_alloca)
11006 if (size > PROBE_INTERVAL)
11007 ix86_emit_probe_stack_range (0, size);
11009 else
11010 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11012 else
11014 if (crtl->is_leaf && !cfun->calls_alloca)
11016 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11017 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11018 size - STACK_CHECK_PROTECT);
11020 else
11021 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11026 if (allocate == 0)
11028 else if (!ix86_target_stack_probe ()
11029 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11031 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11032 GEN_INT (-allocate), -1,
11033 m->fs.cfa_reg == stack_pointer_rtx);
11035 else
11037 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11038 rtx r10 = NULL;
11039 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11040 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11041 bool eax_live = ix86_eax_live_at_start_p ();
11042 bool r10_live = false;
11044 if (TARGET_64BIT)
11045 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11047 if (eax_live)
11049 insn = emit_insn (gen_push (eax));
11050 allocate -= UNITS_PER_WORD;
11051 /* Note that SEH directives need to continue tracking the stack
11052 pointer even after the frame pointer has been set up. */
11053 if (sp_is_cfa_reg || TARGET_SEH)
11055 if (sp_is_cfa_reg)
11056 m->fs.cfa_offset += UNITS_PER_WORD;
11057 RTX_FRAME_RELATED_P (insn) = 1;
11061 if (r10_live)
11063 r10 = gen_rtx_REG (Pmode, R10_REG);
11064 insn = emit_insn (gen_push (r10));
11065 allocate -= UNITS_PER_WORD;
11066 if (sp_is_cfa_reg || TARGET_SEH)
11068 if (sp_is_cfa_reg)
11069 m->fs.cfa_offset += UNITS_PER_WORD;
11070 RTX_FRAME_RELATED_P (insn) = 1;
11074 emit_move_insn (eax, GEN_INT (allocate));
11075 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11077 /* Use the fact that AX still contains ALLOCATE. */
11078 adjust_stack_insn = (Pmode == DImode
11079 ? gen_pro_epilogue_adjust_stack_di_sub
11080 : gen_pro_epilogue_adjust_stack_si_sub);
11082 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11083 stack_pointer_rtx, eax));
11085 if (sp_is_cfa_reg || TARGET_SEH)
11087 if (sp_is_cfa_reg)
11088 m->fs.cfa_offset += allocate;
11089 RTX_FRAME_RELATED_P (insn) = 1;
11090 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11091 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11092 plus_constant (Pmode, stack_pointer_rtx,
11093 -allocate)));
11095 m->fs.sp_offset += allocate;
11097 /* Use stack_pointer_rtx for relative addressing so that code
11098 works for realigned stack, too. */
11099 if (r10_live && eax_live)
11101 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11102 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11103 gen_frame_mem (word_mode, t));
11104 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11105 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11106 gen_frame_mem (word_mode, t));
11108 else if (eax_live || r10_live)
11110 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11111 emit_move_insn (gen_rtx_REG (word_mode,
11112 (eax_live ? AX_REG : R10_REG)),
11113 gen_frame_mem (word_mode, t));
11116 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11118 /* If we havn't already set up the frame pointer, do so now. */
11119 if (frame_pointer_needed && !m->fs.fp_valid)
11121 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11122 GEN_INT (frame.stack_pointer_offset
11123 - frame.hard_frame_pointer_offset));
11124 insn = emit_insn (insn);
11125 RTX_FRAME_RELATED_P (insn) = 1;
11126 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11128 if (m->fs.cfa_reg == stack_pointer_rtx)
11129 m->fs.cfa_reg = hard_frame_pointer_rtx;
11130 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11131 m->fs.fp_valid = true;
11134 if (!int_registers_saved)
11135 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11136 if (!sse_registers_saved)
11137 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11139 pic_reg_used = false;
11140 /* We don't use pic-register for pe-coff target. */
11141 if (pic_offset_table_rtx
11142 && !TARGET_PECOFF
11143 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11144 || crtl->profile))
11146 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11148 if (alt_pic_reg_used != INVALID_REGNUM)
11149 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11151 pic_reg_used = true;
11154 if (pic_reg_used)
11156 if (TARGET_64BIT)
11158 if (ix86_cmodel == CM_LARGE_PIC)
11160 rtx label, tmp_reg;
11162 gcc_assert (Pmode == DImode);
11163 label = gen_label_rtx ();
11164 emit_label (label);
11165 LABEL_PRESERVE_P (label) = 1;
11166 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11167 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11168 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11169 label));
11170 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11171 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11172 pic_offset_table_rtx, tmp_reg));
11174 else
11175 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11177 else
11179 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11180 RTX_FRAME_RELATED_P (insn) = 1;
11181 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11185 /* In the pic_reg_used case, make sure that the got load isn't deleted
11186 when mcount needs it. Blockage to avoid call movement across mcount
11187 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11188 note. */
11189 if (crtl->profile && !flag_fentry && pic_reg_used)
11190 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11192 if (crtl->drap_reg && !crtl->stack_realign_needed)
11194 /* vDRAP is setup but after reload it turns out stack realign
11195 isn't necessary, here we will emit prologue to setup DRAP
11196 without stack realign adjustment */
11197 t = choose_baseaddr (0);
11198 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11201 /* Prevent instructions from being scheduled into register save push
11202 sequence when access to the redzone area is done through frame pointer.
11203 The offset between the frame pointer and the stack pointer is calculated
11204 relative to the value of the stack pointer at the end of the function
11205 prologue, and moving instructions that access redzone area via frame
11206 pointer inside push sequence violates this assumption. */
11207 if (frame_pointer_needed && frame.red_zone_size)
11208 emit_insn (gen_memory_blockage ());
11210 /* Emit cld instruction if stringops are used in the function. */
11211 if (TARGET_CLD && ix86_current_function_needs_cld)
11212 emit_insn (gen_cld ());
11214 /* SEH requires that the prologue end within 256 bytes of the start of
11215 the function. Prevent instruction schedules that would extend that.
11216 Further, prevent alloca modifications to the stack pointer from being
11217 combined with prologue modifications. */
11218 if (TARGET_SEH)
11219 emit_insn (gen_prologue_use (stack_pointer_rtx));
11222 /* Emit code to restore REG using a POP insn. */
11224 static void
11225 ix86_emit_restore_reg_using_pop (rtx reg)
11227 struct machine_function *m = cfun->machine;
11228 rtx insn = emit_insn (gen_pop (reg));
11230 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11231 m->fs.sp_offset -= UNITS_PER_WORD;
11233 if (m->fs.cfa_reg == crtl->drap_reg
11234 && REGNO (reg) == REGNO (crtl->drap_reg))
11236 /* Previously we'd represented the CFA as an expression
11237 like *(%ebp - 8). We've just popped that value from
11238 the stack, which means we need to reset the CFA to
11239 the drap register. This will remain until we restore
11240 the stack pointer. */
11241 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11242 RTX_FRAME_RELATED_P (insn) = 1;
11244 /* This means that the DRAP register is valid for addressing too. */
11245 m->fs.drap_valid = true;
11246 return;
11249 if (m->fs.cfa_reg == stack_pointer_rtx)
11251 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11252 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11253 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11254 RTX_FRAME_RELATED_P (insn) = 1;
11256 m->fs.cfa_offset -= UNITS_PER_WORD;
11259 /* When the frame pointer is the CFA, and we pop it, we are
11260 swapping back to the stack pointer as the CFA. This happens
11261 for stack frames that don't allocate other data, so we assume
11262 the stack pointer is now pointing at the return address, i.e.
11263 the function entry state, which makes the offset be 1 word. */
11264 if (reg == hard_frame_pointer_rtx)
11266 m->fs.fp_valid = false;
11267 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11269 m->fs.cfa_reg = stack_pointer_rtx;
11270 m->fs.cfa_offset -= UNITS_PER_WORD;
11272 add_reg_note (insn, REG_CFA_DEF_CFA,
11273 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11274 GEN_INT (m->fs.cfa_offset)));
11275 RTX_FRAME_RELATED_P (insn) = 1;
11280 /* Emit code to restore saved registers using POP insns. */
11282 static void
11283 ix86_emit_restore_regs_using_pop (void)
11285 unsigned int regno;
11287 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11288 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11289 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11292 /* Emit code and notes for the LEAVE instruction. */
11294 static void
11295 ix86_emit_leave (void)
11297 struct machine_function *m = cfun->machine;
11298 rtx insn = emit_insn (ix86_gen_leave ());
11300 ix86_add_queued_cfa_restore_notes (insn);
11302 gcc_assert (m->fs.fp_valid);
11303 m->fs.sp_valid = true;
11304 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11305 m->fs.fp_valid = false;
11307 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11309 m->fs.cfa_reg = stack_pointer_rtx;
11310 m->fs.cfa_offset = m->fs.sp_offset;
11312 add_reg_note (insn, REG_CFA_DEF_CFA,
11313 plus_constant (Pmode, stack_pointer_rtx,
11314 m->fs.sp_offset));
11315 RTX_FRAME_RELATED_P (insn) = 1;
11317 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11318 m->fs.fp_offset);
11321 /* Emit code to restore saved registers using MOV insns.
11322 First register is restored from CFA - CFA_OFFSET. */
11323 static void
11324 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11325 bool maybe_eh_return)
11327 struct machine_function *m = cfun->machine;
11328 unsigned int regno;
11330 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11331 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11333 rtx reg = gen_rtx_REG (word_mode, regno);
11334 rtx insn, mem;
11336 mem = choose_baseaddr (cfa_offset);
11337 mem = gen_frame_mem (word_mode, mem);
11338 insn = emit_move_insn (reg, mem);
11340 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11342 /* Previously we'd represented the CFA as an expression
11343 like *(%ebp - 8). We've just popped that value from
11344 the stack, which means we need to reset the CFA to
11345 the drap register. This will remain until we restore
11346 the stack pointer. */
11347 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11348 RTX_FRAME_RELATED_P (insn) = 1;
11350 /* This means that the DRAP register is valid for addressing. */
11351 m->fs.drap_valid = true;
11353 else
11354 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11356 cfa_offset -= UNITS_PER_WORD;
11360 /* Emit code to restore saved registers using MOV insns.
11361 First register is restored from CFA - CFA_OFFSET. */
11362 static void
11363 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11364 bool maybe_eh_return)
11366 unsigned int regno;
11368 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11369 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11371 rtx reg = gen_rtx_REG (V4SFmode, regno);
11372 rtx mem;
11374 mem = choose_baseaddr (cfa_offset);
11375 mem = gen_rtx_MEM (V4SFmode, mem);
11376 set_mem_align (mem, 128);
11377 emit_move_insn (reg, mem);
11379 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11381 cfa_offset -= 16;
11385 /* Restore function stack, frame, and registers. */
11387 void
11388 ix86_expand_epilogue (int style)
11390 struct machine_function *m = cfun->machine;
11391 struct machine_frame_state frame_state_save = m->fs;
11392 struct ix86_frame frame;
11393 bool restore_regs_via_mov;
11394 bool using_drap;
11396 ix86_finalize_stack_realign_flags ();
11397 ix86_compute_frame_layout (&frame);
11399 m->fs.sp_valid = (!frame_pointer_needed
11400 || (crtl->sp_is_unchanging
11401 && !stack_realign_fp));
11402 gcc_assert (!m->fs.sp_valid
11403 || m->fs.sp_offset == frame.stack_pointer_offset);
11405 /* The FP must be valid if the frame pointer is present. */
11406 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11407 gcc_assert (!m->fs.fp_valid
11408 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11410 /* We must have *some* valid pointer to the stack frame. */
11411 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11413 /* The DRAP is never valid at this point. */
11414 gcc_assert (!m->fs.drap_valid);
11416 /* See the comment about red zone and frame
11417 pointer usage in ix86_expand_prologue. */
11418 if (frame_pointer_needed && frame.red_zone_size)
11419 emit_insn (gen_memory_blockage ());
11421 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11422 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11424 /* Determine the CFA offset of the end of the red-zone. */
11425 m->fs.red_zone_offset = 0;
11426 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11428 /* The red-zone begins below the return address. */
11429 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11431 /* When the register save area is in the aligned portion of
11432 the stack, determine the maximum runtime displacement that
11433 matches up with the aligned frame. */
11434 if (stack_realign_drap)
11435 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11436 + UNITS_PER_WORD);
11439 /* Special care must be taken for the normal return case of a function
11440 using eh_return: the eax and edx registers are marked as saved, but
11441 not restored along this path. Adjust the save location to match. */
11442 if (crtl->calls_eh_return && style != 2)
11443 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11445 /* EH_RETURN requires the use of moves to function properly. */
11446 if (crtl->calls_eh_return)
11447 restore_regs_via_mov = true;
11448 /* SEH requires the use of pops to identify the epilogue. */
11449 else if (TARGET_SEH)
11450 restore_regs_via_mov = false;
11451 /* If we're only restoring one register and sp is not valid then
11452 using a move instruction to restore the register since it's
11453 less work than reloading sp and popping the register. */
11454 else if (!m->fs.sp_valid && frame.nregs <= 1)
11455 restore_regs_via_mov = true;
11456 else if (TARGET_EPILOGUE_USING_MOVE
11457 && cfun->machine->use_fast_prologue_epilogue
11458 && (frame.nregs > 1
11459 || m->fs.sp_offset != frame.reg_save_offset))
11460 restore_regs_via_mov = true;
11461 else if (frame_pointer_needed
11462 && !frame.nregs
11463 && m->fs.sp_offset != frame.reg_save_offset)
11464 restore_regs_via_mov = true;
11465 else if (frame_pointer_needed
11466 && TARGET_USE_LEAVE
11467 && cfun->machine->use_fast_prologue_epilogue
11468 && frame.nregs == 1)
11469 restore_regs_via_mov = true;
11470 else
11471 restore_regs_via_mov = false;
11473 if (restore_regs_via_mov || frame.nsseregs)
11475 /* Ensure that the entire register save area is addressable via
11476 the stack pointer, if we will restore via sp. */
11477 if (TARGET_64BIT
11478 && m->fs.sp_offset > 0x7fffffff
11479 && !(m->fs.fp_valid || m->fs.drap_valid)
11480 && (frame.nsseregs + frame.nregs) != 0)
11482 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11483 GEN_INT (m->fs.sp_offset
11484 - frame.sse_reg_save_offset),
11485 style,
11486 m->fs.cfa_reg == stack_pointer_rtx);
11490 /* If there are any SSE registers to restore, then we have to do it
11491 via moves, since there's obviously no pop for SSE regs. */
11492 if (frame.nsseregs)
11493 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11494 style == 2);
11496 if (restore_regs_via_mov)
11498 rtx t;
11500 if (frame.nregs)
11501 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11503 /* eh_return epilogues need %ecx added to the stack pointer. */
11504 if (style == 2)
11506 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11508 /* Stack align doesn't work with eh_return. */
11509 gcc_assert (!stack_realign_drap);
11510 /* Neither does regparm nested functions. */
11511 gcc_assert (!ix86_static_chain_on_stack);
11513 if (frame_pointer_needed)
11515 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11516 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11517 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11519 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11520 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11522 /* Note that we use SA as a temporary CFA, as the return
11523 address is at the proper place relative to it. We
11524 pretend this happens at the FP restore insn because
11525 prior to this insn the FP would be stored at the wrong
11526 offset relative to SA, and after this insn we have no
11527 other reasonable register to use for the CFA. We don't
11528 bother resetting the CFA to the SP for the duration of
11529 the return insn. */
11530 add_reg_note (insn, REG_CFA_DEF_CFA,
11531 plus_constant (Pmode, sa, UNITS_PER_WORD));
11532 ix86_add_queued_cfa_restore_notes (insn);
11533 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11534 RTX_FRAME_RELATED_P (insn) = 1;
11536 m->fs.cfa_reg = sa;
11537 m->fs.cfa_offset = UNITS_PER_WORD;
11538 m->fs.fp_valid = false;
11540 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11541 const0_rtx, style, false);
11543 else
11545 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11546 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11547 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11548 ix86_add_queued_cfa_restore_notes (insn);
11550 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11551 if (m->fs.cfa_offset != UNITS_PER_WORD)
11553 m->fs.cfa_offset = UNITS_PER_WORD;
11554 add_reg_note (insn, REG_CFA_DEF_CFA,
11555 plus_constant (Pmode, stack_pointer_rtx,
11556 UNITS_PER_WORD));
11557 RTX_FRAME_RELATED_P (insn) = 1;
11560 m->fs.sp_offset = UNITS_PER_WORD;
11561 m->fs.sp_valid = true;
11564 else
11566 /* SEH requires that the function end with (1) a stack adjustment
11567 if necessary, (2) a sequence of pops, and (3) a return or
11568 jump instruction. Prevent insns from the function body from
11569 being scheduled into this sequence. */
11570 if (TARGET_SEH)
11572 /* Prevent a catch region from being adjacent to the standard
11573 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11574 several other flags that would be interesting to test are
11575 not yet set up. */
11576 if (flag_non_call_exceptions)
11577 emit_insn (gen_nops (const1_rtx));
11578 else
11579 emit_insn (gen_blockage ());
11582 /* First step is to deallocate the stack frame so that we can
11583 pop the registers. Also do it on SEH target for very large
11584 frame as the emitted instructions aren't allowed by the ABI in
11585 epilogues. */
11586 if (!m->fs.sp_valid
11587 || (TARGET_SEH
11588 && (m->fs.sp_offset - frame.reg_save_offset
11589 >= SEH_MAX_FRAME_SIZE)))
11591 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11592 GEN_INT (m->fs.fp_offset
11593 - frame.reg_save_offset),
11594 style, false);
11596 else if (m->fs.sp_offset != frame.reg_save_offset)
11598 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11599 GEN_INT (m->fs.sp_offset
11600 - frame.reg_save_offset),
11601 style,
11602 m->fs.cfa_reg == stack_pointer_rtx);
11605 ix86_emit_restore_regs_using_pop ();
11608 /* If we used a stack pointer and haven't already got rid of it,
11609 then do so now. */
11610 if (m->fs.fp_valid)
11612 /* If the stack pointer is valid and pointing at the frame
11613 pointer store address, then we only need a pop. */
11614 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11615 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11616 /* Leave results in shorter dependency chains on CPUs that are
11617 able to grok it fast. */
11618 else if (TARGET_USE_LEAVE
11619 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11620 || !cfun->machine->use_fast_prologue_epilogue)
11621 ix86_emit_leave ();
11622 else
11624 pro_epilogue_adjust_stack (stack_pointer_rtx,
11625 hard_frame_pointer_rtx,
11626 const0_rtx, style, !using_drap);
11627 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11631 if (using_drap)
11633 int param_ptr_offset = UNITS_PER_WORD;
11634 rtx insn;
11636 gcc_assert (stack_realign_drap);
11638 if (ix86_static_chain_on_stack)
11639 param_ptr_offset += UNITS_PER_WORD;
11640 if (!call_used_regs[REGNO (crtl->drap_reg)])
11641 param_ptr_offset += UNITS_PER_WORD;
11643 insn = emit_insn (gen_rtx_SET
11644 (VOIDmode, stack_pointer_rtx,
11645 gen_rtx_PLUS (Pmode,
11646 crtl->drap_reg,
11647 GEN_INT (-param_ptr_offset))));
11648 m->fs.cfa_reg = stack_pointer_rtx;
11649 m->fs.cfa_offset = param_ptr_offset;
11650 m->fs.sp_offset = param_ptr_offset;
11651 m->fs.realigned = false;
11653 add_reg_note (insn, REG_CFA_DEF_CFA,
11654 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11655 GEN_INT (param_ptr_offset)));
11656 RTX_FRAME_RELATED_P (insn) = 1;
11658 if (!call_used_regs[REGNO (crtl->drap_reg)])
11659 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11662 /* At this point the stack pointer must be valid, and we must have
11663 restored all of the registers. We may not have deallocated the
11664 entire stack frame. We've delayed this until now because it may
11665 be possible to merge the local stack deallocation with the
11666 deallocation forced by ix86_static_chain_on_stack. */
11667 gcc_assert (m->fs.sp_valid);
11668 gcc_assert (!m->fs.fp_valid);
11669 gcc_assert (!m->fs.realigned);
11670 if (m->fs.sp_offset != UNITS_PER_WORD)
11672 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11673 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11674 style, true);
11676 else
11677 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11679 /* Sibcall epilogues don't want a return instruction. */
11680 if (style == 0)
11682 m->fs = frame_state_save;
11683 return;
11686 if (crtl->args.pops_args && crtl->args.size)
11688 rtx popc = GEN_INT (crtl->args.pops_args);
11690 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11691 address, do explicit add, and jump indirectly to the caller. */
11693 if (crtl->args.pops_args >= 65536)
11695 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11696 rtx insn;
11698 /* There is no "pascal" calling convention in any 64bit ABI. */
11699 gcc_assert (!TARGET_64BIT);
11701 insn = emit_insn (gen_pop (ecx));
11702 m->fs.cfa_offset -= UNITS_PER_WORD;
11703 m->fs.sp_offset -= UNITS_PER_WORD;
11705 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11706 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11707 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11708 add_reg_note (insn, REG_CFA_REGISTER,
11709 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11710 RTX_FRAME_RELATED_P (insn) = 1;
11712 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11713 popc, -1, true);
11714 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11716 else
11717 emit_jump_insn (gen_simple_return_pop_internal (popc));
11719 else
11720 emit_jump_insn (gen_simple_return_internal ());
11722 /* Restore the state back to the state from the prologue,
11723 so that it's correct for the next epilogue. */
11724 m->fs = frame_state_save;
11727 /* Reset from the function's potential modifications. */
11729 static void
11730 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11731 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11733 if (pic_offset_table_rtx)
11734 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11735 #if TARGET_MACHO
11736 /* Mach-O doesn't support labels at the end of objects, so if
11737 it looks like we might want one, insert a NOP. */
11739 rtx insn = get_last_insn ();
11740 rtx deleted_debug_label = NULL_RTX;
11741 while (insn
11742 && NOTE_P (insn)
11743 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11745 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11746 notes only, instead set their CODE_LABEL_NUMBER to -1,
11747 otherwise there would be code generation differences
11748 in between -g and -g0. */
11749 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11750 deleted_debug_label = insn;
11751 insn = PREV_INSN (insn);
11753 if (insn
11754 && (LABEL_P (insn)
11755 || (NOTE_P (insn)
11756 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11757 fputs ("\tnop\n", file);
11758 else if (deleted_debug_label)
11759 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11760 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11761 CODE_LABEL_NUMBER (insn) = -1;
11763 #endif
11767 /* Return a scratch register to use in the split stack prologue. The
11768 split stack prologue is used for -fsplit-stack. It is the first
11769 instructions in the function, even before the regular prologue.
11770 The scratch register can be any caller-saved register which is not
11771 used for parameters or for the static chain. */
11773 static unsigned int
11774 split_stack_prologue_scratch_regno (void)
11776 if (TARGET_64BIT)
11777 return R11_REG;
11778 else
11780 bool is_fastcall, is_thiscall;
11781 int regparm;
11783 is_fastcall = (lookup_attribute ("fastcall",
11784 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11785 != NULL);
11786 is_thiscall = (lookup_attribute ("thiscall",
11787 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11788 != NULL);
11789 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11791 if (is_fastcall)
11793 if (DECL_STATIC_CHAIN (cfun->decl))
11795 sorry ("-fsplit-stack does not support fastcall with "
11796 "nested function");
11797 return INVALID_REGNUM;
11799 return AX_REG;
11801 else if (is_thiscall)
11803 if (!DECL_STATIC_CHAIN (cfun->decl))
11804 return DX_REG;
11805 return AX_REG;
11807 else if (regparm < 3)
11809 if (!DECL_STATIC_CHAIN (cfun->decl))
11810 return CX_REG;
11811 else
11813 if (regparm >= 2)
11815 sorry ("-fsplit-stack does not support 2 register "
11816 " parameters for a nested function");
11817 return INVALID_REGNUM;
11819 return DX_REG;
11822 else
11824 /* FIXME: We could make this work by pushing a register
11825 around the addition and comparison. */
11826 sorry ("-fsplit-stack does not support 3 register parameters");
11827 return INVALID_REGNUM;
11832 /* A SYMBOL_REF for the function which allocates new stackspace for
11833 -fsplit-stack. */
11835 static GTY(()) rtx split_stack_fn;
11837 /* A SYMBOL_REF for the more stack function when using the large
11838 model. */
11840 static GTY(()) rtx split_stack_fn_large;
11842 /* Handle -fsplit-stack. These are the first instructions in the
11843 function, even before the regular prologue. */
11845 void
11846 ix86_expand_split_stack_prologue (void)
11848 struct ix86_frame frame;
11849 HOST_WIDE_INT allocate;
11850 unsigned HOST_WIDE_INT args_size;
11851 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11852 rtx scratch_reg = NULL_RTX;
11853 rtx varargs_label = NULL_RTX;
11854 rtx fn;
11856 gcc_assert (flag_split_stack && reload_completed);
11858 ix86_finalize_stack_realign_flags ();
11859 ix86_compute_frame_layout (&frame);
11860 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11862 /* This is the label we will branch to if we have enough stack
11863 space. We expect the basic block reordering pass to reverse this
11864 branch if optimizing, so that we branch in the unlikely case. */
11865 label = gen_label_rtx ();
11867 /* We need to compare the stack pointer minus the frame size with
11868 the stack boundary in the TCB. The stack boundary always gives
11869 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11870 can compare directly. Otherwise we need to do an addition. */
11872 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11873 UNSPEC_STACK_CHECK);
11874 limit = gen_rtx_CONST (Pmode, limit);
11875 limit = gen_rtx_MEM (Pmode, limit);
11876 if (allocate < SPLIT_STACK_AVAILABLE)
11877 current = stack_pointer_rtx;
11878 else
11880 unsigned int scratch_regno;
11881 rtx offset;
11883 /* We need a scratch register to hold the stack pointer minus
11884 the required frame size. Since this is the very start of the
11885 function, the scratch register can be any caller-saved
11886 register which is not used for parameters. */
11887 offset = GEN_INT (- allocate);
11888 scratch_regno = split_stack_prologue_scratch_regno ();
11889 if (scratch_regno == INVALID_REGNUM)
11890 return;
11891 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11892 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11894 /* We don't use ix86_gen_add3 in this case because it will
11895 want to split to lea, but when not optimizing the insn
11896 will not be split after this point. */
11897 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11898 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11899 offset)));
11901 else
11903 emit_move_insn (scratch_reg, offset);
11904 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11905 stack_pointer_rtx));
11907 current = scratch_reg;
11910 ix86_expand_branch (GEU, current, limit, label);
11911 jump_insn = get_last_insn ();
11912 JUMP_LABEL (jump_insn) = label;
11914 /* Mark the jump as very likely to be taken. */
11915 add_int_reg_note (jump_insn, REG_BR_PROB,
11916 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11918 if (split_stack_fn == NULL_RTX)
11919 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11920 fn = split_stack_fn;
11922 /* Get more stack space. We pass in the desired stack space and the
11923 size of the arguments to copy to the new stack. In 32-bit mode
11924 we push the parameters; __morestack will return on a new stack
11925 anyhow. In 64-bit mode we pass the parameters in r10 and
11926 r11. */
11927 allocate_rtx = GEN_INT (allocate);
11928 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11929 call_fusage = NULL_RTX;
11930 if (TARGET_64BIT)
11932 rtx reg10, reg11;
11934 reg10 = gen_rtx_REG (Pmode, R10_REG);
11935 reg11 = gen_rtx_REG (Pmode, R11_REG);
11937 /* If this function uses a static chain, it will be in %r10.
11938 Preserve it across the call to __morestack. */
11939 if (DECL_STATIC_CHAIN (cfun->decl))
11941 rtx rax;
11943 rax = gen_rtx_REG (word_mode, AX_REG);
11944 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11945 use_reg (&call_fusage, rax);
11948 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11949 && !TARGET_PECOFF)
11951 HOST_WIDE_INT argval;
11953 gcc_assert (Pmode == DImode);
11954 /* When using the large model we need to load the address
11955 into a register, and we've run out of registers. So we
11956 switch to a different calling convention, and we call a
11957 different function: __morestack_large. We pass the
11958 argument size in the upper 32 bits of r10 and pass the
11959 frame size in the lower 32 bits. */
11960 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11961 gcc_assert ((args_size & 0xffffffff) == args_size);
11963 if (split_stack_fn_large == NULL_RTX)
11964 split_stack_fn_large =
11965 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11967 if (ix86_cmodel == CM_LARGE_PIC)
11969 rtx label, x;
11971 label = gen_label_rtx ();
11972 emit_label (label);
11973 LABEL_PRESERVE_P (label) = 1;
11974 emit_insn (gen_set_rip_rex64 (reg10, label));
11975 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11976 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11977 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11978 UNSPEC_GOT);
11979 x = gen_rtx_CONST (Pmode, x);
11980 emit_move_insn (reg11, x);
11981 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11982 x = gen_const_mem (Pmode, x);
11983 emit_move_insn (reg11, x);
11985 else
11986 emit_move_insn (reg11, split_stack_fn_large);
11988 fn = reg11;
11990 argval = ((args_size << 16) << 16) + allocate;
11991 emit_move_insn (reg10, GEN_INT (argval));
11993 else
11995 emit_move_insn (reg10, allocate_rtx);
11996 emit_move_insn (reg11, GEN_INT (args_size));
11997 use_reg (&call_fusage, reg11);
12000 use_reg (&call_fusage, reg10);
12002 else
12004 emit_insn (gen_push (GEN_INT (args_size)));
12005 emit_insn (gen_push (allocate_rtx));
12007 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12008 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12009 NULL_RTX, false);
12010 add_function_usage_to (call_insn, call_fusage);
12012 /* In order to make call/return prediction work right, we now need
12013 to execute a return instruction. See
12014 libgcc/config/i386/morestack.S for the details on how this works.
12016 For flow purposes gcc must not see this as a return
12017 instruction--we need control flow to continue at the subsequent
12018 label. Therefore, we use an unspec. */
12019 gcc_assert (crtl->args.pops_args < 65536);
12020 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12022 /* If we are in 64-bit mode and this function uses a static chain,
12023 we saved %r10 in %rax before calling _morestack. */
12024 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12025 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12026 gen_rtx_REG (word_mode, AX_REG));
12028 /* If this function calls va_start, we need to store a pointer to
12029 the arguments on the old stack, because they may not have been
12030 all copied to the new stack. At this point the old stack can be
12031 found at the frame pointer value used by __morestack, because
12032 __morestack has set that up before calling back to us. Here we
12033 store that pointer in a scratch register, and in
12034 ix86_expand_prologue we store the scratch register in a stack
12035 slot. */
12036 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12038 unsigned int scratch_regno;
12039 rtx frame_reg;
12040 int words;
12042 scratch_regno = split_stack_prologue_scratch_regno ();
12043 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12044 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12046 /* 64-bit:
12047 fp -> old fp value
12048 return address within this function
12049 return address of caller of this function
12050 stack arguments
12051 So we add three words to get to the stack arguments.
12053 32-bit:
12054 fp -> old fp value
12055 return address within this function
12056 first argument to __morestack
12057 second argument to __morestack
12058 return address of caller of this function
12059 stack arguments
12060 So we add five words to get to the stack arguments.
12062 words = TARGET_64BIT ? 3 : 5;
12063 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12064 gen_rtx_PLUS (Pmode, frame_reg,
12065 GEN_INT (words * UNITS_PER_WORD))));
12067 varargs_label = gen_label_rtx ();
12068 emit_jump_insn (gen_jump (varargs_label));
12069 JUMP_LABEL (get_last_insn ()) = varargs_label;
12071 emit_barrier ();
12074 emit_label (label);
12075 LABEL_NUSES (label) = 1;
12077 /* If this function calls va_start, we now have to set the scratch
12078 register for the case where we do not call __morestack. In this
12079 case we need to set it based on the stack pointer. */
12080 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12082 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12083 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12084 GEN_INT (UNITS_PER_WORD))));
12086 emit_label (varargs_label);
12087 LABEL_NUSES (varargs_label) = 1;
12091 /* We may have to tell the dataflow pass that the split stack prologue
12092 is initializing a scratch register. */
12094 static void
12095 ix86_live_on_entry (bitmap regs)
12097 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12099 gcc_assert (flag_split_stack);
12100 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12104 /* Extract the parts of an RTL expression that is a valid memory address
12105 for an instruction. Return 0 if the structure of the address is
12106 grossly off. Return -1 if the address contains ASHIFT, so it is not
12107 strictly valid, but still used for computing length of lea instruction. */
12110 ix86_decompose_address (rtx addr, struct ix86_address *out)
12112 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12113 rtx base_reg, index_reg;
12114 HOST_WIDE_INT scale = 1;
12115 rtx scale_rtx = NULL_RTX;
12116 rtx tmp;
12117 int retval = 1;
12118 enum ix86_address_seg seg = SEG_DEFAULT;
12120 /* Allow zero-extended SImode addresses,
12121 they will be emitted with addr32 prefix. */
12122 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12124 if (GET_CODE (addr) == ZERO_EXTEND
12125 && GET_MODE (XEXP (addr, 0)) == SImode)
12127 addr = XEXP (addr, 0);
12128 if (CONST_INT_P (addr))
12129 return 0;
12131 else if (GET_CODE (addr) == AND
12132 && const_32bit_mask (XEXP (addr, 1), DImode))
12134 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12135 if (addr == NULL_RTX)
12136 return 0;
12138 if (CONST_INT_P (addr))
12139 return 0;
12143 /* Allow SImode subregs of DImode addresses,
12144 they will be emitted with addr32 prefix. */
12145 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12147 if (GET_CODE (addr) == SUBREG
12148 && GET_MODE (SUBREG_REG (addr)) == DImode)
12150 addr = SUBREG_REG (addr);
12151 if (CONST_INT_P (addr))
12152 return 0;
12156 if (REG_P (addr))
12157 base = addr;
12158 else if (GET_CODE (addr) == SUBREG)
12160 if (REG_P (SUBREG_REG (addr)))
12161 base = addr;
12162 else
12163 return 0;
12165 else if (GET_CODE (addr) == PLUS)
12167 rtx addends[4], op;
12168 int n = 0, i;
12170 op = addr;
12173 if (n >= 4)
12174 return 0;
12175 addends[n++] = XEXP (op, 1);
12176 op = XEXP (op, 0);
12178 while (GET_CODE (op) == PLUS);
12179 if (n >= 4)
12180 return 0;
12181 addends[n] = op;
12183 for (i = n; i >= 0; --i)
12185 op = addends[i];
12186 switch (GET_CODE (op))
12188 case MULT:
12189 if (index)
12190 return 0;
12191 index = XEXP (op, 0);
12192 scale_rtx = XEXP (op, 1);
12193 break;
12195 case ASHIFT:
12196 if (index)
12197 return 0;
12198 index = XEXP (op, 0);
12199 tmp = XEXP (op, 1);
12200 if (!CONST_INT_P (tmp))
12201 return 0;
12202 scale = INTVAL (tmp);
12203 if ((unsigned HOST_WIDE_INT) scale > 3)
12204 return 0;
12205 scale = 1 << scale;
12206 break;
12208 case ZERO_EXTEND:
12209 op = XEXP (op, 0);
12210 if (GET_CODE (op) != UNSPEC)
12211 return 0;
12212 /* FALLTHRU */
12214 case UNSPEC:
12215 if (XINT (op, 1) == UNSPEC_TP
12216 && TARGET_TLS_DIRECT_SEG_REFS
12217 && seg == SEG_DEFAULT)
12218 seg = DEFAULT_TLS_SEG_REG;
12219 else
12220 return 0;
12221 break;
12223 case SUBREG:
12224 if (!REG_P (SUBREG_REG (op)))
12225 return 0;
12226 /* FALLTHRU */
12228 case REG:
12229 if (!base)
12230 base = op;
12231 else if (!index)
12232 index = op;
12233 else
12234 return 0;
12235 break;
12237 case CONST:
12238 case CONST_INT:
12239 case SYMBOL_REF:
12240 case LABEL_REF:
12241 if (disp)
12242 return 0;
12243 disp = op;
12244 break;
12246 default:
12247 return 0;
12251 else if (GET_CODE (addr) == MULT)
12253 index = XEXP (addr, 0); /* index*scale */
12254 scale_rtx = XEXP (addr, 1);
12256 else if (GET_CODE (addr) == ASHIFT)
12258 /* We're called for lea too, which implements ashift on occasion. */
12259 index = XEXP (addr, 0);
12260 tmp = XEXP (addr, 1);
12261 if (!CONST_INT_P (tmp))
12262 return 0;
12263 scale = INTVAL (tmp);
12264 if ((unsigned HOST_WIDE_INT) scale > 3)
12265 return 0;
12266 scale = 1 << scale;
12267 retval = -1;
12269 else
12270 disp = addr; /* displacement */
12272 if (index)
12274 if (REG_P (index))
12276 else if (GET_CODE (index) == SUBREG
12277 && REG_P (SUBREG_REG (index)))
12279 else
12280 return 0;
12283 /* Extract the integral value of scale. */
12284 if (scale_rtx)
12286 if (!CONST_INT_P (scale_rtx))
12287 return 0;
12288 scale = INTVAL (scale_rtx);
12291 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12292 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12294 /* Avoid useless 0 displacement. */
12295 if (disp == const0_rtx && (base || index))
12296 disp = NULL_RTX;
12298 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12299 if (base_reg && index_reg && scale == 1
12300 && (index_reg == arg_pointer_rtx
12301 || index_reg == frame_pointer_rtx
12302 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12304 rtx tmp;
12305 tmp = base, base = index, index = tmp;
12306 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12309 /* Special case: %ebp cannot be encoded as a base without a displacement.
12310 Similarly %r13. */
12311 if (!disp
12312 && base_reg
12313 && (base_reg == hard_frame_pointer_rtx
12314 || base_reg == frame_pointer_rtx
12315 || base_reg == arg_pointer_rtx
12316 || (REG_P (base_reg)
12317 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12318 || REGNO (base_reg) == R13_REG))))
12319 disp = const0_rtx;
12321 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12322 Avoid this by transforming to [%esi+0].
12323 Reload calls address legitimization without cfun defined, so we need
12324 to test cfun for being non-NULL. */
12325 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12326 && base_reg && !index_reg && !disp
12327 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12328 disp = const0_rtx;
12330 /* Special case: encode reg+reg instead of reg*2. */
12331 if (!base && index && scale == 2)
12332 base = index, base_reg = index_reg, scale = 1;
12334 /* Special case: scaling cannot be encoded without base or displacement. */
12335 if (!base && !disp && index && scale != 1)
12336 disp = const0_rtx;
12338 out->base = base;
12339 out->index = index;
12340 out->disp = disp;
12341 out->scale = scale;
12342 out->seg = seg;
12344 return retval;
12347 /* Return cost of the memory address x.
12348 For i386, it is better to use a complex address than let gcc copy
12349 the address into a reg and make a new pseudo. But not if the address
12350 requires to two regs - that would mean more pseudos with longer
12351 lifetimes. */
12352 static int
12353 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12354 addr_space_t as ATTRIBUTE_UNUSED,
12355 bool speed ATTRIBUTE_UNUSED)
12357 struct ix86_address parts;
12358 int cost = 1;
12359 int ok = ix86_decompose_address (x, &parts);
12361 gcc_assert (ok);
12363 if (parts.base && GET_CODE (parts.base) == SUBREG)
12364 parts.base = SUBREG_REG (parts.base);
12365 if (parts.index && GET_CODE (parts.index) == SUBREG)
12366 parts.index = SUBREG_REG (parts.index);
12368 /* Attempt to minimize number of registers in the address. */
12369 if ((parts.base
12370 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12371 || (parts.index
12372 && (!REG_P (parts.index)
12373 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12374 cost++;
12376 if (parts.base
12377 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12378 && parts.index
12379 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12380 && parts.base != parts.index)
12381 cost++;
12383 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12384 since it's predecode logic can't detect the length of instructions
12385 and it degenerates to vector decoded. Increase cost of such
12386 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12387 to split such addresses or even refuse such addresses at all.
12389 Following addressing modes are affected:
12390 [base+scale*index]
12391 [scale*index+disp]
12392 [base+index]
12394 The first and last case may be avoidable by explicitly coding the zero in
12395 memory address, but I don't have AMD-K6 machine handy to check this
12396 theory. */
12398 if (TARGET_K6
12399 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12400 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12401 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12402 cost += 10;
12404 return cost;
12407 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12408 this is used for to form addresses to local data when -fPIC is in
12409 use. */
12411 static bool
12412 darwin_local_data_pic (rtx disp)
12414 return (GET_CODE (disp) == UNSPEC
12415 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12418 /* Determine if a given RTX is a valid constant. We already know this
12419 satisfies CONSTANT_P. */
12421 static bool
12422 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12424 switch (GET_CODE (x))
12426 case CONST:
12427 x = XEXP (x, 0);
12429 if (GET_CODE (x) == PLUS)
12431 if (!CONST_INT_P (XEXP (x, 1)))
12432 return false;
12433 x = XEXP (x, 0);
12436 if (TARGET_MACHO && darwin_local_data_pic (x))
12437 return true;
12439 /* Only some unspecs are valid as "constants". */
12440 if (GET_CODE (x) == UNSPEC)
12441 switch (XINT (x, 1))
12443 case UNSPEC_GOT:
12444 case UNSPEC_GOTOFF:
12445 case UNSPEC_PLTOFF:
12446 return TARGET_64BIT;
12447 case UNSPEC_TPOFF:
12448 case UNSPEC_NTPOFF:
12449 x = XVECEXP (x, 0, 0);
12450 return (GET_CODE (x) == SYMBOL_REF
12451 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12452 case UNSPEC_DTPOFF:
12453 x = XVECEXP (x, 0, 0);
12454 return (GET_CODE (x) == SYMBOL_REF
12455 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12456 default:
12457 return false;
12460 /* We must have drilled down to a symbol. */
12461 if (GET_CODE (x) == LABEL_REF)
12462 return true;
12463 if (GET_CODE (x) != SYMBOL_REF)
12464 return false;
12465 /* FALLTHRU */
12467 case SYMBOL_REF:
12468 /* TLS symbols are never valid. */
12469 if (SYMBOL_REF_TLS_MODEL (x))
12470 return false;
12472 /* DLLIMPORT symbols are never valid. */
12473 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12474 && SYMBOL_REF_DLLIMPORT_P (x))
12475 return false;
12477 #if TARGET_MACHO
12478 /* mdynamic-no-pic */
12479 if (MACHO_DYNAMIC_NO_PIC_P)
12480 return machopic_symbol_defined_p (x);
12481 #endif
12482 break;
12484 case CONST_DOUBLE:
12485 if (GET_MODE (x) == TImode
12486 && x != CONST0_RTX (TImode)
12487 && !TARGET_64BIT)
12488 return false;
12489 break;
12491 case CONST_VECTOR:
12492 if (!standard_sse_constant_p (x))
12493 return false;
12495 default:
12496 break;
12499 /* Otherwise we handle everything else in the move patterns. */
12500 return true;
12503 /* Determine if it's legal to put X into the constant pool. This
12504 is not possible for the address of thread-local symbols, which
12505 is checked above. */
12507 static bool
12508 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12510 /* We can always put integral constants and vectors in memory. */
12511 switch (GET_CODE (x))
12513 case CONST_INT:
12514 case CONST_DOUBLE:
12515 case CONST_VECTOR:
12516 return false;
12518 default:
12519 break;
12521 return !ix86_legitimate_constant_p (mode, x);
12524 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12525 otherwise zero. */
12527 static bool
12528 is_imported_p (rtx x)
12530 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12531 || GET_CODE (x) != SYMBOL_REF)
12532 return false;
12534 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12538 /* Nonzero if the constant value X is a legitimate general operand
12539 when generating PIC code. It is given that flag_pic is on and
12540 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12542 bool
12543 legitimate_pic_operand_p (rtx x)
12545 rtx inner;
12547 switch (GET_CODE (x))
12549 case CONST:
12550 inner = XEXP (x, 0);
12551 if (GET_CODE (inner) == PLUS
12552 && CONST_INT_P (XEXP (inner, 1)))
12553 inner = XEXP (inner, 0);
12555 /* Only some unspecs are valid as "constants". */
12556 if (GET_CODE (inner) == UNSPEC)
12557 switch (XINT (inner, 1))
12559 case UNSPEC_GOT:
12560 case UNSPEC_GOTOFF:
12561 case UNSPEC_PLTOFF:
12562 return TARGET_64BIT;
12563 case UNSPEC_TPOFF:
12564 x = XVECEXP (inner, 0, 0);
12565 return (GET_CODE (x) == SYMBOL_REF
12566 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12567 case UNSPEC_MACHOPIC_OFFSET:
12568 return legitimate_pic_address_disp_p (x);
12569 default:
12570 return false;
12572 /* FALLTHRU */
12574 case SYMBOL_REF:
12575 case LABEL_REF:
12576 return legitimate_pic_address_disp_p (x);
12578 default:
12579 return true;
12583 /* Determine if a given CONST RTX is a valid memory displacement
12584 in PIC mode. */
12586 bool
12587 legitimate_pic_address_disp_p (rtx disp)
12589 bool saw_plus;
12591 /* In 64bit mode we can allow direct addresses of symbols and labels
12592 when they are not dynamic symbols. */
12593 if (TARGET_64BIT)
12595 rtx op0 = disp, op1;
12597 switch (GET_CODE (disp))
12599 case LABEL_REF:
12600 return true;
12602 case CONST:
12603 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12604 break;
12605 op0 = XEXP (XEXP (disp, 0), 0);
12606 op1 = XEXP (XEXP (disp, 0), 1);
12607 if (!CONST_INT_P (op1)
12608 || INTVAL (op1) >= 16*1024*1024
12609 || INTVAL (op1) < -16*1024*1024)
12610 break;
12611 if (GET_CODE (op0) == LABEL_REF)
12612 return true;
12613 if (GET_CODE (op0) == CONST
12614 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12615 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12616 return true;
12617 if (GET_CODE (op0) == UNSPEC
12618 && XINT (op0, 1) == UNSPEC_PCREL)
12619 return true;
12620 if (GET_CODE (op0) != SYMBOL_REF)
12621 break;
12622 /* FALLTHRU */
12624 case SYMBOL_REF:
12625 /* TLS references should always be enclosed in UNSPEC.
12626 The dllimported symbol needs always to be resolved. */
12627 if (SYMBOL_REF_TLS_MODEL (op0)
12628 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12629 return false;
12631 if (TARGET_PECOFF)
12633 if (is_imported_p (op0))
12634 return true;
12636 if (SYMBOL_REF_FAR_ADDR_P (op0)
12637 || !SYMBOL_REF_LOCAL_P (op0))
12638 break;
12640 /* Function-symbols need to be resolved only for
12641 large-model.
12642 For the small-model we don't need to resolve anything
12643 here. */
12644 if ((ix86_cmodel != CM_LARGE_PIC
12645 && SYMBOL_REF_FUNCTION_P (op0))
12646 || ix86_cmodel == CM_SMALL_PIC)
12647 return true;
12648 /* Non-external symbols don't need to be resolved for
12649 large, and medium-model. */
12650 if ((ix86_cmodel == CM_LARGE_PIC
12651 || ix86_cmodel == CM_MEDIUM_PIC)
12652 && !SYMBOL_REF_EXTERNAL_P (op0))
12653 return true;
12655 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12656 && SYMBOL_REF_LOCAL_P (op0)
12657 && ix86_cmodel != CM_LARGE_PIC)
12658 return true;
12659 break;
12661 default:
12662 break;
12665 if (GET_CODE (disp) != CONST)
12666 return false;
12667 disp = XEXP (disp, 0);
12669 if (TARGET_64BIT)
12671 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12672 of GOT tables. We should not need these anyway. */
12673 if (GET_CODE (disp) != UNSPEC
12674 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12675 && XINT (disp, 1) != UNSPEC_GOTOFF
12676 && XINT (disp, 1) != UNSPEC_PCREL
12677 && XINT (disp, 1) != UNSPEC_PLTOFF))
12678 return false;
12680 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12681 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12682 return false;
12683 return true;
12686 saw_plus = false;
12687 if (GET_CODE (disp) == PLUS)
12689 if (!CONST_INT_P (XEXP (disp, 1)))
12690 return false;
12691 disp = XEXP (disp, 0);
12692 saw_plus = true;
12695 if (TARGET_MACHO && darwin_local_data_pic (disp))
12696 return true;
12698 if (GET_CODE (disp) != UNSPEC)
12699 return false;
12701 switch (XINT (disp, 1))
12703 case UNSPEC_GOT:
12704 if (saw_plus)
12705 return false;
12706 /* We need to check for both symbols and labels because VxWorks loads
12707 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12708 details. */
12709 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12710 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12711 case UNSPEC_GOTOFF:
12712 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12713 While ABI specify also 32bit relocation but we don't produce it in
12714 small PIC model at all. */
12715 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12716 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12717 && !TARGET_64BIT)
12718 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12719 return false;
12720 case UNSPEC_GOTTPOFF:
12721 case UNSPEC_GOTNTPOFF:
12722 case UNSPEC_INDNTPOFF:
12723 if (saw_plus)
12724 return false;
12725 disp = XVECEXP (disp, 0, 0);
12726 return (GET_CODE (disp) == SYMBOL_REF
12727 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12728 case UNSPEC_NTPOFF:
12729 disp = XVECEXP (disp, 0, 0);
12730 return (GET_CODE (disp) == SYMBOL_REF
12731 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12732 case UNSPEC_DTPOFF:
12733 disp = XVECEXP (disp, 0, 0);
12734 return (GET_CODE (disp) == SYMBOL_REF
12735 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12738 return false;
12741 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12742 replace the input X, or the original X if no replacement is called for.
12743 The output parameter *WIN is 1 if the calling macro should goto WIN,
12744 0 if it should not. */
12746 bool
12747 ix86_legitimize_reload_address (rtx x,
12748 enum machine_mode mode ATTRIBUTE_UNUSED,
12749 int opnum, int type,
12750 int ind_levels ATTRIBUTE_UNUSED)
12752 /* Reload can generate:
12754 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12755 (reg:DI 97))
12756 (reg:DI 2 cx))
12758 This RTX is rejected from ix86_legitimate_address_p due to
12759 non-strictness of base register 97. Following this rejection,
12760 reload pushes all three components into separate registers,
12761 creating invalid memory address RTX.
12763 Following code reloads only the invalid part of the
12764 memory address RTX. */
12766 if (GET_CODE (x) == PLUS
12767 && REG_P (XEXP (x, 1))
12768 && GET_CODE (XEXP (x, 0)) == PLUS
12769 && REG_P (XEXP (XEXP (x, 0), 1)))
12771 rtx base, index;
12772 bool something_reloaded = false;
12774 base = XEXP (XEXP (x, 0), 1);
12775 if (!REG_OK_FOR_BASE_STRICT_P (base))
12777 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12778 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12779 opnum, (enum reload_type) type);
12780 something_reloaded = true;
12783 index = XEXP (x, 1);
12784 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12786 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12787 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12788 opnum, (enum reload_type) type);
12789 something_reloaded = true;
12792 gcc_assert (something_reloaded);
12793 return true;
12796 return false;
12799 /* Determine if op is suitable RTX for an address register.
12800 Return naked register if a register or a register subreg is
12801 found, otherwise return NULL_RTX. */
12803 static rtx
12804 ix86_validate_address_register (rtx op)
12806 enum machine_mode mode = GET_MODE (op);
12808 /* Only SImode or DImode registers can form the address. */
12809 if (mode != SImode && mode != DImode)
12810 return NULL_RTX;
12812 if (REG_P (op))
12813 return op;
12814 else if (GET_CODE (op) == SUBREG)
12816 rtx reg = SUBREG_REG (op);
12818 if (!REG_P (reg))
12819 return NULL_RTX;
12821 mode = GET_MODE (reg);
12823 /* Don't allow SUBREGs that span more than a word. It can
12824 lead to spill failures when the register is one word out
12825 of a two word structure. */
12826 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12827 return NULL_RTX;
12829 /* Allow only SUBREGs of non-eliminable hard registers. */
12830 if (register_no_elim_operand (reg, mode))
12831 return reg;
12834 /* Op is not a register. */
12835 return NULL_RTX;
12838 /* Recognizes RTL expressions that are valid memory addresses for an
12839 instruction. The MODE argument is the machine mode for the MEM
12840 expression that wants to use this address.
12842 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12843 convert common non-canonical forms to canonical form so that they will
12844 be recognized. */
12846 static bool
12847 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12848 rtx addr, bool strict)
12850 struct ix86_address parts;
12851 rtx base, index, disp;
12852 HOST_WIDE_INT scale;
12853 enum ix86_address_seg seg;
12855 if (ix86_decompose_address (addr, &parts) <= 0)
12856 /* Decomposition failed. */
12857 return false;
12859 base = parts.base;
12860 index = parts.index;
12861 disp = parts.disp;
12862 scale = parts.scale;
12863 seg = parts.seg;
12865 /* Validate base register. */
12866 if (base)
12868 rtx reg = ix86_validate_address_register (base);
12870 if (reg == NULL_RTX)
12871 return false;
12873 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12874 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12875 /* Base is not valid. */
12876 return false;
12879 /* Validate index register. */
12880 if (index)
12882 rtx reg = ix86_validate_address_register (index);
12884 if (reg == NULL_RTX)
12885 return false;
12887 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12888 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12889 /* Index is not valid. */
12890 return false;
12893 /* Index and base should have the same mode. */
12894 if (base && index
12895 && GET_MODE (base) != GET_MODE (index))
12896 return false;
12898 /* Address override works only on the (%reg) part of %fs:(%reg). */
12899 if (seg != SEG_DEFAULT
12900 && ((base && GET_MODE (base) != word_mode)
12901 || (index && GET_MODE (index) != word_mode)))
12902 return false;
12904 /* Validate scale factor. */
12905 if (scale != 1)
12907 if (!index)
12908 /* Scale without index. */
12909 return false;
12911 if (scale != 2 && scale != 4 && scale != 8)
12912 /* Scale is not a valid multiplier. */
12913 return false;
12916 /* Validate displacement. */
12917 if (disp)
12919 if (GET_CODE (disp) == CONST
12920 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12921 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12922 switch (XINT (XEXP (disp, 0), 1))
12924 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12925 used. While ABI specify also 32bit relocations, we don't produce
12926 them at all and use IP relative instead. */
12927 case UNSPEC_GOT:
12928 case UNSPEC_GOTOFF:
12929 gcc_assert (flag_pic);
12930 if (!TARGET_64BIT)
12931 goto is_legitimate_pic;
12933 /* 64bit address unspec. */
12934 return false;
12936 case UNSPEC_GOTPCREL:
12937 case UNSPEC_PCREL:
12938 gcc_assert (flag_pic);
12939 goto is_legitimate_pic;
12941 case UNSPEC_GOTTPOFF:
12942 case UNSPEC_GOTNTPOFF:
12943 case UNSPEC_INDNTPOFF:
12944 case UNSPEC_NTPOFF:
12945 case UNSPEC_DTPOFF:
12946 break;
12948 case UNSPEC_STACK_CHECK:
12949 gcc_assert (flag_split_stack);
12950 break;
12952 default:
12953 /* Invalid address unspec. */
12954 return false;
12957 else if (SYMBOLIC_CONST (disp)
12958 && (flag_pic
12959 || (TARGET_MACHO
12960 #if TARGET_MACHO
12961 && MACHOPIC_INDIRECT
12962 && !machopic_operand_p (disp)
12963 #endif
12967 is_legitimate_pic:
12968 if (TARGET_64BIT && (index || base))
12970 /* foo@dtpoff(%rX) is ok. */
12971 if (GET_CODE (disp) != CONST
12972 || GET_CODE (XEXP (disp, 0)) != PLUS
12973 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12974 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12975 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12976 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12977 /* Non-constant pic memory reference. */
12978 return false;
12980 else if ((!TARGET_MACHO || flag_pic)
12981 && ! legitimate_pic_address_disp_p (disp))
12982 /* Displacement is an invalid pic construct. */
12983 return false;
12984 #if TARGET_MACHO
12985 else if (MACHO_DYNAMIC_NO_PIC_P
12986 && !ix86_legitimate_constant_p (Pmode, disp))
12987 /* displacment must be referenced via non_lazy_pointer */
12988 return false;
12989 #endif
12991 /* This code used to verify that a symbolic pic displacement
12992 includes the pic_offset_table_rtx register.
12994 While this is good idea, unfortunately these constructs may
12995 be created by "adds using lea" optimization for incorrect
12996 code like:
12998 int a;
12999 int foo(int i)
13001 return *(&a+i);
13004 This code is nonsensical, but results in addressing
13005 GOT table with pic_offset_table_rtx base. We can't
13006 just refuse it easily, since it gets matched by
13007 "addsi3" pattern, that later gets split to lea in the
13008 case output register differs from input. While this
13009 can be handled by separate addsi pattern for this case
13010 that never results in lea, this seems to be easier and
13011 correct fix for crash to disable this test. */
13013 else if (GET_CODE (disp) != LABEL_REF
13014 && !CONST_INT_P (disp)
13015 && (GET_CODE (disp) != CONST
13016 || !ix86_legitimate_constant_p (Pmode, disp))
13017 && (GET_CODE (disp) != SYMBOL_REF
13018 || !ix86_legitimate_constant_p (Pmode, disp)))
13019 /* Displacement is not constant. */
13020 return false;
13021 else if (TARGET_64BIT
13022 && !x86_64_immediate_operand (disp, VOIDmode))
13023 /* Displacement is out of range. */
13024 return false;
13025 /* In x32 mode, constant addresses are sign extended to 64bit, so
13026 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13027 else if (TARGET_X32 && !(index || base)
13028 && CONST_INT_P (disp)
13029 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13030 return false;
13033 /* Everything looks valid. */
13034 return true;
13037 /* Determine if a given RTX is a valid constant address. */
13039 bool
13040 constant_address_p (rtx x)
13042 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13045 /* Return a unique alias set for the GOT. */
13047 static alias_set_type
13048 ix86_GOT_alias_set (void)
13050 static alias_set_type set = -1;
13051 if (set == -1)
13052 set = new_alias_set ();
13053 return set;
13056 /* Return a legitimate reference for ORIG (an address) using the
13057 register REG. If REG is 0, a new pseudo is generated.
13059 There are two types of references that must be handled:
13061 1. Global data references must load the address from the GOT, via
13062 the PIC reg. An insn is emitted to do this load, and the reg is
13063 returned.
13065 2. Static data references, constant pool addresses, and code labels
13066 compute the address as an offset from the GOT, whose base is in
13067 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13068 differentiate them from global data objects. The returned
13069 address is the PIC reg + an unspec constant.
13071 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13072 reg also appears in the address. */
13074 static rtx
13075 legitimize_pic_address (rtx orig, rtx reg)
13077 rtx addr = orig;
13078 rtx new_rtx = orig;
13080 #if TARGET_MACHO
13081 if (TARGET_MACHO && !TARGET_64BIT)
13083 if (reg == 0)
13084 reg = gen_reg_rtx (Pmode);
13085 /* Use the generic Mach-O PIC machinery. */
13086 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13088 #endif
13090 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13092 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13093 if (tmp)
13094 return tmp;
13097 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13098 new_rtx = addr;
13099 else if (TARGET_64BIT && !TARGET_PECOFF
13100 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13102 rtx tmpreg;
13103 /* This symbol may be referenced via a displacement from the PIC
13104 base address (@GOTOFF). */
13106 if (reload_in_progress)
13107 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13108 if (GET_CODE (addr) == CONST)
13109 addr = XEXP (addr, 0);
13110 if (GET_CODE (addr) == PLUS)
13112 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13113 UNSPEC_GOTOFF);
13114 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13116 else
13117 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13118 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13119 if (!reg)
13120 tmpreg = gen_reg_rtx (Pmode);
13121 else
13122 tmpreg = reg;
13123 emit_move_insn (tmpreg, new_rtx);
13125 if (reg != 0)
13127 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13128 tmpreg, 1, OPTAB_DIRECT);
13129 new_rtx = reg;
13131 else
13132 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13134 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13136 /* This symbol may be referenced via a displacement from the PIC
13137 base address (@GOTOFF). */
13139 if (reload_in_progress)
13140 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13141 if (GET_CODE (addr) == CONST)
13142 addr = XEXP (addr, 0);
13143 if (GET_CODE (addr) == PLUS)
13145 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13146 UNSPEC_GOTOFF);
13147 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13149 else
13150 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13151 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13152 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13154 if (reg != 0)
13156 emit_move_insn (reg, new_rtx);
13157 new_rtx = reg;
13160 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13161 /* We can't use @GOTOFF for text labels on VxWorks;
13162 see gotoff_operand. */
13163 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13165 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13166 if (tmp)
13167 return tmp;
13169 /* For x64 PE-COFF there is no GOT table. So we use address
13170 directly. */
13171 if (TARGET_64BIT && TARGET_PECOFF)
13173 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13174 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13176 if (reg == 0)
13177 reg = gen_reg_rtx (Pmode);
13178 emit_move_insn (reg, new_rtx);
13179 new_rtx = reg;
13181 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13183 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13184 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13185 new_rtx = gen_const_mem (Pmode, new_rtx);
13186 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13188 if (reg == 0)
13189 reg = gen_reg_rtx (Pmode);
13190 /* Use directly gen_movsi, otherwise the address is loaded
13191 into register for CSE. We don't want to CSE this addresses,
13192 instead we CSE addresses from the GOT table, so skip this. */
13193 emit_insn (gen_movsi (reg, new_rtx));
13194 new_rtx = reg;
13196 else
13198 /* This symbol must be referenced via a load from the
13199 Global Offset Table (@GOT). */
13201 if (reload_in_progress)
13202 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13203 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13204 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13205 if (TARGET_64BIT)
13206 new_rtx = force_reg (Pmode, new_rtx);
13207 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13208 new_rtx = gen_const_mem (Pmode, new_rtx);
13209 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13211 if (reg == 0)
13212 reg = gen_reg_rtx (Pmode);
13213 emit_move_insn (reg, new_rtx);
13214 new_rtx = reg;
13217 else
13219 if (CONST_INT_P (addr)
13220 && !x86_64_immediate_operand (addr, VOIDmode))
13222 if (reg)
13224 emit_move_insn (reg, addr);
13225 new_rtx = reg;
13227 else
13228 new_rtx = force_reg (Pmode, addr);
13230 else if (GET_CODE (addr) == CONST)
13232 addr = XEXP (addr, 0);
13234 /* We must match stuff we generate before. Assume the only
13235 unspecs that can get here are ours. Not that we could do
13236 anything with them anyway.... */
13237 if (GET_CODE (addr) == UNSPEC
13238 || (GET_CODE (addr) == PLUS
13239 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13240 return orig;
13241 gcc_assert (GET_CODE (addr) == PLUS);
13243 if (GET_CODE (addr) == PLUS)
13245 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13247 /* Check first to see if this is a constant offset from a @GOTOFF
13248 symbol reference. */
13249 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13250 && CONST_INT_P (op1))
13252 if (!TARGET_64BIT)
13254 if (reload_in_progress)
13255 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13256 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13257 UNSPEC_GOTOFF);
13258 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13259 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13260 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13262 if (reg != 0)
13264 emit_move_insn (reg, new_rtx);
13265 new_rtx = reg;
13268 else
13270 if (INTVAL (op1) < -16*1024*1024
13271 || INTVAL (op1) >= 16*1024*1024)
13273 if (!x86_64_immediate_operand (op1, Pmode))
13274 op1 = force_reg (Pmode, op1);
13275 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13279 else
13281 rtx base = legitimize_pic_address (op0, reg);
13282 enum machine_mode mode = GET_MODE (base);
13283 new_rtx
13284 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13286 if (CONST_INT_P (new_rtx))
13288 if (INTVAL (new_rtx) < -16*1024*1024
13289 || INTVAL (new_rtx) >= 16*1024*1024)
13291 if (!x86_64_immediate_operand (new_rtx, mode))
13292 new_rtx = force_reg (mode, new_rtx);
13293 new_rtx
13294 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13296 else
13297 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13299 else
13301 if (GET_CODE (new_rtx) == PLUS
13302 && CONSTANT_P (XEXP (new_rtx, 1)))
13304 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13305 new_rtx = XEXP (new_rtx, 1);
13307 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13312 return new_rtx;
13315 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13317 static rtx
13318 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13320 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13322 if (GET_MODE (tp) != tp_mode)
13324 gcc_assert (GET_MODE (tp) == SImode);
13325 gcc_assert (tp_mode == DImode);
13327 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13330 if (to_reg)
13331 tp = copy_to_mode_reg (tp_mode, tp);
13333 return tp;
13336 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13338 static GTY(()) rtx ix86_tls_symbol;
13340 static rtx
13341 ix86_tls_get_addr (void)
13343 if (!ix86_tls_symbol)
13345 const char *sym
13346 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13347 ? "___tls_get_addr" : "__tls_get_addr");
13349 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13352 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13354 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13355 UNSPEC_PLTOFF);
13356 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13357 gen_rtx_CONST (Pmode, unspec));
13360 return ix86_tls_symbol;
13363 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13365 static GTY(()) rtx ix86_tls_module_base_symbol;
13368 ix86_tls_module_base (void)
13370 if (!ix86_tls_module_base_symbol)
13372 ix86_tls_module_base_symbol
13373 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13375 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13376 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13379 return ix86_tls_module_base_symbol;
13382 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13383 false if we expect this to be used for a memory address and true if
13384 we expect to load the address into a register. */
13386 static rtx
13387 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13389 rtx dest, base, off;
13390 rtx pic = NULL_RTX, tp = NULL_RTX;
13391 enum machine_mode tp_mode = Pmode;
13392 int type;
13394 /* Fall back to global dynamic model if tool chain cannot support local
13395 dynamic. */
13396 if (TARGET_SUN_TLS && !TARGET_64BIT
13397 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13398 && model == TLS_MODEL_LOCAL_DYNAMIC)
13399 model = TLS_MODEL_GLOBAL_DYNAMIC;
13401 switch (model)
13403 case TLS_MODEL_GLOBAL_DYNAMIC:
13404 dest = gen_reg_rtx (Pmode);
13406 if (!TARGET_64BIT)
13408 if (flag_pic && !TARGET_PECOFF)
13409 pic = pic_offset_table_rtx;
13410 else
13412 pic = gen_reg_rtx (Pmode);
13413 emit_insn (gen_set_got (pic));
13417 if (TARGET_GNU2_TLS)
13419 if (TARGET_64BIT)
13420 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13421 else
13422 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13424 tp = get_thread_pointer (Pmode, true);
13425 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13427 if (GET_MODE (x) != Pmode)
13428 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13430 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13432 else
13434 rtx caddr = ix86_tls_get_addr ();
13436 if (TARGET_64BIT)
13438 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13439 rtx insns;
13441 start_sequence ();
13442 emit_call_insn
13443 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13444 insns = get_insns ();
13445 end_sequence ();
13447 if (GET_MODE (x) != Pmode)
13448 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13450 RTL_CONST_CALL_P (insns) = 1;
13451 emit_libcall_block (insns, dest, rax, x);
13453 else
13454 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13456 break;
13458 case TLS_MODEL_LOCAL_DYNAMIC:
13459 base = gen_reg_rtx (Pmode);
13461 if (!TARGET_64BIT)
13463 if (flag_pic)
13464 pic = pic_offset_table_rtx;
13465 else
13467 pic = gen_reg_rtx (Pmode);
13468 emit_insn (gen_set_got (pic));
13472 if (TARGET_GNU2_TLS)
13474 rtx tmp = ix86_tls_module_base ();
13476 if (TARGET_64BIT)
13477 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13478 else
13479 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13481 tp = get_thread_pointer (Pmode, true);
13482 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13483 gen_rtx_MINUS (Pmode, tmp, tp));
13485 else
13487 rtx caddr = ix86_tls_get_addr ();
13489 if (TARGET_64BIT)
13491 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13492 rtx insns, eqv;
13494 start_sequence ();
13495 emit_call_insn
13496 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13497 insns = get_insns ();
13498 end_sequence ();
13500 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13501 share the LD_BASE result with other LD model accesses. */
13502 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13503 UNSPEC_TLS_LD_BASE);
13505 RTL_CONST_CALL_P (insns) = 1;
13506 emit_libcall_block (insns, base, rax, eqv);
13508 else
13509 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13512 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13513 off = gen_rtx_CONST (Pmode, off);
13515 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13517 if (TARGET_GNU2_TLS)
13519 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13521 if (GET_MODE (x) != Pmode)
13522 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13524 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13526 break;
13528 case TLS_MODEL_INITIAL_EXEC:
13529 if (TARGET_64BIT)
13531 if (TARGET_SUN_TLS && !TARGET_X32)
13533 /* The Sun linker took the AMD64 TLS spec literally
13534 and can only handle %rax as destination of the
13535 initial executable code sequence. */
13537 dest = gen_reg_rtx (DImode);
13538 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13539 return dest;
13542 /* Generate DImode references to avoid %fs:(%reg32)
13543 problems and linker IE->LE relaxation bug. */
13544 tp_mode = DImode;
13545 pic = NULL;
13546 type = UNSPEC_GOTNTPOFF;
13548 else if (flag_pic)
13550 if (reload_in_progress)
13551 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13552 pic = pic_offset_table_rtx;
13553 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13555 else if (!TARGET_ANY_GNU_TLS)
13557 pic = gen_reg_rtx (Pmode);
13558 emit_insn (gen_set_got (pic));
13559 type = UNSPEC_GOTTPOFF;
13561 else
13563 pic = NULL;
13564 type = UNSPEC_INDNTPOFF;
13567 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13568 off = gen_rtx_CONST (tp_mode, off);
13569 if (pic)
13570 off = gen_rtx_PLUS (tp_mode, pic, off);
13571 off = gen_const_mem (tp_mode, off);
13572 set_mem_alias_set (off, ix86_GOT_alias_set ());
13574 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13576 base = get_thread_pointer (tp_mode,
13577 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13578 off = force_reg (tp_mode, off);
13579 return gen_rtx_PLUS (tp_mode, base, off);
13581 else
13583 base = get_thread_pointer (Pmode, true);
13584 dest = gen_reg_rtx (Pmode);
13585 emit_insn (ix86_gen_sub3 (dest, base, off));
13587 break;
13589 case TLS_MODEL_LOCAL_EXEC:
13590 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13591 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13592 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13593 off = gen_rtx_CONST (Pmode, off);
13595 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13597 base = get_thread_pointer (Pmode,
13598 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13599 return gen_rtx_PLUS (Pmode, base, off);
13601 else
13603 base = get_thread_pointer (Pmode, true);
13604 dest = gen_reg_rtx (Pmode);
13605 emit_insn (ix86_gen_sub3 (dest, base, off));
13607 break;
13609 default:
13610 gcc_unreachable ();
13613 return dest;
13616 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13617 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13618 unique refptr-DECL symbol corresponding to symbol DECL. */
13620 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13621 htab_t dllimport_map;
13623 static tree
13624 get_dllimport_decl (tree decl, bool beimport)
13626 struct tree_map *h, in;
13627 void **loc;
13628 const char *name;
13629 const char *prefix;
13630 size_t namelen, prefixlen;
13631 char *imp_name;
13632 tree to;
13633 rtx rtl;
13635 if (!dllimport_map)
13636 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13638 in.hash = htab_hash_pointer (decl);
13639 in.base.from = decl;
13640 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13641 h = (struct tree_map *) *loc;
13642 if (h)
13643 return h->to;
13645 *loc = h = ggc_alloc_tree_map ();
13646 h->hash = in.hash;
13647 h->base.from = decl;
13648 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13649 VAR_DECL, NULL, ptr_type_node);
13650 DECL_ARTIFICIAL (to) = 1;
13651 DECL_IGNORED_P (to) = 1;
13652 DECL_EXTERNAL (to) = 1;
13653 TREE_READONLY (to) = 1;
13655 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13656 name = targetm.strip_name_encoding (name);
13657 if (beimport)
13658 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13659 ? "*__imp_" : "*__imp__";
13660 else
13661 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13662 namelen = strlen (name);
13663 prefixlen = strlen (prefix);
13664 imp_name = (char *) alloca (namelen + prefixlen + 1);
13665 memcpy (imp_name, prefix, prefixlen);
13666 memcpy (imp_name + prefixlen, name, namelen + 1);
13668 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13669 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13670 SET_SYMBOL_REF_DECL (rtl, to);
13671 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13672 if (!beimport)
13674 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13675 #ifdef SUB_TARGET_RECORD_STUB
13676 SUB_TARGET_RECORD_STUB (name);
13677 #endif
13680 rtl = gen_const_mem (Pmode, rtl);
13681 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13683 SET_DECL_RTL (to, rtl);
13684 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13686 return to;
13689 /* Expand SYMBOL into its corresponding far-addresse symbol.
13690 WANT_REG is true if we require the result be a register. */
13692 static rtx
13693 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13695 tree imp_decl;
13696 rtx x;
13698 gcc_assert (SYMBOL_REF_DECL (symbol));
13699 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13701 x = DECL_RTL (imp_decl);
13702 if (want_reg)
13703 x = force_reg (Pmode, x);
13704 return x;
13707 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13708 true if we require the result be a register. */
13710 static rtx
13711 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13713 tree imp_decl;
13714 rtx x;
13716 gcc_assert (SYMBOL_REF_DECL (symbol));
13717 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13719 x = DECL_RTL (imp_decl);
13720 if (want_reg)
13721 x = force_reg (Pmode, x);
13722 return x;
13725 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13726 is true if we require the result be a register. */
13728 static rtx
13729 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13731 if (!TARGET_PECOFF)
13732 return NULL_RTX;
13734 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13736 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13737 return legitimize_dllimport_symbol (addr, inreg);
13738 if (GET_CODE (addr) == CONST
13739 && GET_CODE (XEXP (addr, 0)) == PLUS
13740 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13741 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13743 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13744 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13748 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13749 return NULL_RTX;
13750 if (GET_CODE (addr) == SYMBOL_REF
13751 && !is_imported_p (addr)
13752 && SYMBOL_REF_EXTERNAL_P (addr)
13753 && SYMBOL_REF_DECL (addr))
13754 return legitimize_pe_coff_extern_decl (addr, inreg);
13756 if (GET_CODE (addr) == CONST
13757 && GET_CODE (XEXP (addr, 0)) == PLUS
13758 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13759 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13760 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13761 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13763 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13764 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13766 return NULL_RTX;
13769 /* Try machine-dependent ways of modifying an illegitimate address
13770 to be legitimate. If we find one, return the new, valid address.
13771 This macro is used in only one place: `memory_address' in explow.c.
13773 OLDX is the address as it was before break_out_memory_refs was called.
13774 In some cases it is useful to look at this to decide what needs to be done.
13776 It is always safe for this macro to do nothing. It exists to recognize
13777 opportunities to optimize the output.
13779 For the 80386, we handle X+REG by loading X into a register R and
13780 using R+REG. R will go in a general reg and indexing will be used.
13781 However, if REG is a broken-out memory address or multiplication,
13782 nothing needs to be done because REG can certainly go in a general reg.
13784 When -fpic is used, special handling is needed for symbolic references.
13785 See comments by legitimize_pic_address in i386.c for details. */
13787 static rtx
13788 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13789 enum machine_mode mode)
13791 int changed = 0;
13792 unsigned log;
13794 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13795 if (log)
13796 return legitimize_tls_address (x, (enum tls_model) log, false);
13797 if (GET_CODE (x) == CONST
13798 && GET_CODE (XEXP (x, 0)) == PLUS
13799 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13800 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13802 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13803 (enum tls_model) log, false);
13804 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13807 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13809 rtx tmp = legitimize_pe_coff_symbol (x, true);
13810 if (tmp)
13811 return tmp;
13814 if (flag_pic && SYMBOLIC_CONST (x))
13815 return legitimize_pic_address (x, 0);
13817 #if TARGET_MACHO
13818 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13819 return machopic_indirect_data_reference (x, 0);
13820 #endif
13822 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13823 if (GET_CODE (x) == ASHIFT
13824 && CONST_INT_P (XEXP (x, 1))
13825 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13827 changed = 1;
13828 log = INTVAL (XEXP (x, 1));
13829 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13830 GEN_INT (1 << log));
13833 if (GET_CODE (x) == PLUS)
13835 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13837 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13838 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13839 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13841 changed = 1;
13842 log = INTVAL (XEXP (XEXP (x, 0), 1));
13843 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13844 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13845 GEN_INT (1 << log));
13848 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13849 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13850 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13852 changed = 1;
13853 log = INTVAL (XEXP (XEXP (x, 1), 1));
13854 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13855 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13856 GEN_INT (1 << log));
13859 /* Put multiply first if it isn't already. */
13860 if (GET_CODE (XEXP (x, 1)) == MULT)
13862 rtx tmp = XEXP (x, 0);
13863 XEXP (x, 0) = XEXP (x, 1);
13864 XEXP (x, 1) = tmp;
13865 changed = 1;
13868 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13869 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13870 created by virtual register instantiation, register elimination, and
13871 similar optimizations. */
13872 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13874 changed = 1;
13875 x = gen_rtx_PLUS (Pmode,
13876 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13877 XEXP (XEXP (x, 1), 0)),
13878 XEXP (XEXP (x, 1), 1));
13881 /* Canonicalize
13882 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13883 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13884 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13885 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13886 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13887 && CONSTANT_P (XEXP (x, 1)))
13889 rtx constant;
13890 rtx other = NULL_RTX;
13892 if (CONST_INT_P (XEXP (x, 1)))
13894 constant = XEXP (x, 1);
13895 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13897 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13899 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13900 other = XEXP (x, 1);
13902 else
13903 constant = 0;
13905 if (constant)
13907 changed = 1;
13908 x = gen_rtx_PLUS (Pmode,
13909 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13910 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13911 plus_constant (Pmode, other,
13912 INTVAL (constant)));
13916 if (changed && ix86_legitimate_address_p (mode, x, false))
13917 return x;
13919 if (GET_CODE (XEXP (x, 0)) == MULT)
13921 changed = 1;
13922 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13925 if (GET_CODE (XEXP (x, 1)) == MULT)
13927 changed = 1;
13928 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13931 if (changed
13932 && REG_P (XEXP (x, 1))
13933 && REG_P (XEXP (x, 0)))
13934 return x;
13936 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13938 changed = 1;
13939 x = legitimize_pic_address (x, 0);
13942 if (changed && ix86_legitimate_address_p (mode, x, false))
13943 return x;
13945 if (REG_P (XEXP (x, 0)))
13947 rtx temp = gen_reg_rtx (Pmode);
13948 rtx val = force_operand (XEXP (x, 1), temp);
13949 if (val != temp)
13951 val = convert_to_mode (Pmode, val, 1);
13952 emit_move_insn (temp, val);
13955 XEXP (x, 1) = temp;
13956 return x;
13959 else if (REG_P (XEXP (x, 1)))
13961 rtx temp = gen_reg_rtx (Pmode);
13962 rtx val = force_operand (XEXP (x, 0), temp);
13963 if (val != temp)
13965 val = convert_to_mode (Pmode, val, 1);
13966 emit_move_insn (temp, val);
13969 XEXP (x, 0) = temp;
13970 return x;
13974 return x;
13977 /* Print an integer constant expression in assembler syntax. Addition
13978 and subtraction are the only arithmetic that may appear in these
13979 expressions. FILE is the stdio stream to write to, X is the rtx, and
13980 CODE is the operand print code from the output string. */
13982 static void
13983 output_pic_addr_const (FILE *file, rtx x, int code)
13985 char buf[256];
13987 switch (GET_CODE (x))
13989 case PC:
13990 gcc_assert (flag_pic);
13991 putc ('.', file);
13992 break;
13994 case SYMBOL_REF:
13995 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13996 output_addr_const (file, x);
13997 else
13999 const char *name = XSTR (x, 0);
14001 /* Mark the decl as referenced so that cgraph will
14002 output the function. */
14003 if (SYMBOL_REF_DECL (x))
14004 mark_decl_referenced (SYMBOL_REF_DECL (x));
14006 #if TARGET_MACHO
14007 if (MACHOPIC_INDIRECT
14008 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14009 name = machopic_indirection_name (x, /*stub_p=*/true);
14010 #endif
14011 assemble_name (file, name);
14013 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14014 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14015 fputs ("@PLT", file);
14016 break;
14018 case LABEL_REF:
14019 x = XEXP (x, 0);
14020 /* FALLTHRU */
14021 case CODE_LABEL:
14022 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14023 assemble_name (asm_out_file, buf);
14024 break;
14026 case CONST_INT:
14027 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14028 break;
14030 case CONST:
14031 /* This used to output parentheses around the expression,
14032 but that does not work on the 386 (either ATT or BSD assembler). */
14033 output_pic_addr_const (file, XEXP (x, 0), code);
14034 break;
14036 case CONST_DOUBLE:
14037 if (GET_MODE (x) == VOIDmode)
14039 /* We can use %d if the number is <32 bits and positive. */
14040 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14041 fprintf (file, "0x%lx%08lx",
14042 (unsigned long) CONST_DOUBLE_HIGH (x),
14043 (unsigned long) CONST_DOUBLE_LOW (x));
14044 else
14045 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14047 else
14048 /* We can't handle floating point constants;
14049 TARGET_PRINT_OPERAND must handle them. */
14050 output_operand_lossage ("floating constant misused");
14051 break;
14053 case PLUS:
14054 /* Some assemblers need integer constants to appear first. */
14055 if (CONST_INT_P (XEXP (x, 0)))
14057 output_pic_addr_const (file, XEXP (x, 0), code);
14058 putc ('+', file);
14059 output_pic_addr_const (file, XEXP (x, 1), code);
14061 else
14063 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14064 output_pic_addr_const (file, XEXP (x, 1), code);
14065 putc ('+', file);
14066 output_pic_addr_const (file, XEXP (x, 0), code);
14068 break;
14070 case MINUS:
14071 if (!TARGET_MACHO)
14072 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14073 output_pic_addr_const (file, XEXP (x, 0), code);
14074 putc ('-', file);
14075 output_pic_addr_const (file, XEXP (x, 1), code);
14076 if (!TARGET_MACHO)
14077 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14078 break;
14080 case UNSPEC:
14081 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14083 bool f = i386_asm_output_addr_const_extra (file, x);
14084 gcc_assert (f);
14085 break;
14088 gcc_assert (XVECLEN (x, 0) == 1);
14089 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14090 switch (XINT (x, 1))
14092 case UNSPEC_GOT:
14093 fputs ("@GOT", file);
14094 break;
14095 case UNSPEC_GOTOFF:
14096 fputs ("@GOTOFF", file);
14097 break;
14098 case UNSPEC_PLTOFF:
14099 fputs ("@PLTOFF", file);
14100 break;
14101 case UNSPEC_PCREL:
14102 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14103 "(%rip)" : "[rip]", file);
14104 break;
14105 case UNSPEC_GOTPCREL:
14106 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14107 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14108 break;
14109 case UNSPEC_GOTTPOFF:
14110 /* FIXME: This might be @TPOFF in Sun ld too. */
14111 fputs ("@gottpoff", file);
14112 break;
14113 case UNSPEC_TPOFF:
14114 fputs ("@tpoff", file);
14115 break;
14116 case UNSPEC_NTPOFF:
14117 if (TARGET_64BIT)
14118 fputs ("@tpoff", file);
14119 else
14120 fputs ("@ntpoff", file);
14121 break;
14122 case UNSPEC_DTPOFF:
14123 fputs ("@dtpoff", file);
14124 break;
14125 case UNSPEC_GOTNTPOFF:
14126 if (TARGET_64BIT)
14127 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14128 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14129 else
14130 fputs ("@gotntpoff", file);
14131 break;
14132 case UNSPEC_INDNTPOFF:
14133 fputs ("@indntpoff", file);
14134 break;
14135 #if TARGET_MACHO
14136 case UNSPEC_MACHOPIC_OFFSET:
14137 putc ('-', file);
14138 machopic_output_function_base_name (file);
14139 break;
14140 #endif
14141 default:
14142 output_operand_lossage ("invalid UNSPEC as operand");
14143 break;
14145 break;
14147 default:
14148 output_operand_lossage ("invalid expression as operand");
14152 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14153 We need to emit DTP-relative relocations. */
14155 static void ATTRIBUTE_UNUSED
14156 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14158 fputs (ASM_LONG, file);
14159 output_addr_const (file, x);
14160 fputs ("@dtpoff", file);
14161 switch (size)
14163 case 4:
14164 break;
14165 case 8:
14166 fputs (", 0", file);
14167 break;
14168 default:
14169 gcc_unreachable ();
14173 /* Return true if X is a representation of the PIC register. This copes
14174 with calls from ix86_find_base_term, where the register might have
14175 been replaced by a cselib value. */
14177 static bool
14178 ix86_pic_register_p (rtx x)
14180 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14181 return (pic_offset_table_rtx
14182 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14183 else
14184 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14187 /* Helper function for ix86_delegitimize_address.
14188 Attempt to delegitimize TLS local-exec accesses. */
14190 static rtx
14191 ix86_delegitimize_tls_address (rtx orig_x)
14193 rtx x = orig_x, unspec;
14194 struct ix86_address addr;
14196 if (!TARGET_TLS_DIRECT_SEG_REFS)
14197 return orig_x;
14198 if (MEM_P (x))
14199 x = XEXP (x, 0);
14200 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14201 return orig_x;
14202 if (ix86_decompose_address (x, &addr) == 0
14203 || addr.seg != DEFAULT_TLS_SEG_REG
14204 || addr.disp == NULL_RTX
14205 || GET_CODE (addr.disp) != CONST)
14206 return orig_x;
14207 unspec = XEXP (addr.disp, 0);
14208 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14209 unspec = XEXP (unspec, 0);
14210 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14211 return orig_x;
14212 x = XVECEXP (unspec, 0, 0);
14213 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14214 if (unspec != XEXP (addr.disp, 0))
14215 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14216 if (addr.index)
14218 rtx idx = addr.index;
14219 if (addr.scale != 1)
14220 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14221 x = gen_rtx_PLUS (Pmode, idx, x);
14223 if (addr.base)
14224 x = gen_rtx_PLUS (Pmode, addr.base, x);
14225 if (MEM_P (orig_x))
14226 x = replace_equiv_address_nv (orig_x, x);
14227 return x;
14230 /* In the name of slightly smaller debug output, and to cater to
14231 general assembler lossage, recognize PIC+GOTOFF and turn it back
14232 into a direct symbol reference.
14234 On Darwin, this is necessary to avoid a crash, because Darwin
14235 has a different PIC label for each routine but the DWARF debugging
14236 information is not associated with any particular routine, so it's
14237 necessary to remove references to the PIC label from RTL stored by
14238 the DWARF output code. */
14240 static rtx
14241 ix86_delegitimize_address (rtx x)
14243 rtx orig_x = delegitimize_mem_from_attrs (x);
14244 /* addend is NULL or some rtx if x is something+GOTOFF where
14245 something doesn't include the PIC register. */
14246 rtx addend = NULL_RTX;
14247 /* reg_addend is NULL or a multiple of some register. */
14248 rtx reg_addend = NULL_RTX;
14249 /* const_addend is NULL or a const_int. */
14250 rtx const_addend = NULL_RTX;
14251 /* This is the result, or NULL. */
14252 rtx result = NULL_RTX;
14254 x = orig_x;
14256 if (MEM_P (x))
14257 x = XEXP (x, 0);
14259 if (TARGET_64BIT)
14261 if (GET_CODE (x) == CONST
14262 && GET_CODE (XEXP (x, 0)) == PLUS
14263 && GET_MODE (XEXP (x, 0)) == Pmode
14264 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14265 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14266 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14268 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14269 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14270 if (MEM_P (orig_x))
14271 x = replace_equiv_address_nv (orig_x, x);
14272 return x;
14275 if (GET_CODE (x) == CONST
14276 && GET_CODE (XEXP (x, 0)) == UNSPEC
14277 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14278 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14279 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14281 x = XVECEXP (XEXP (x, 0), 0, 0);
14282 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14284 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14285 GET_MODE (x), 0);
14286 if (x == NULL_RTX)
14287 return orig_x;
14289 return x;
14292 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14293 return ix86_delegitimize_tls_address (orig_x);
14295 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14296 and -mcmodel=medium -fpic. */
14299 if (GET_CODE (x) != PLUS
14300 || GET_CODE (XEXP (x, 1)) != CONST)
14301 return ix86_delegitimize_tls_address (orig_x);
14303 if (ix86_pic_register_p (XEXP (x, 0)))
14304 /* %ebx + GOT/GOTOFF */
14306 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14308 /* %ebx + %reg * scale + GOT/GOTOFF */
14309 reg_addend = XEXP (x, 0);
14310 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14311 reg_addend = XEXP (reg_addend, 1);
14312 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14313 reg_addend = XEXP (reg_addend, 0);
14314 else
14316 reg_addend = NULL_RTX;
14317 addend = XEXP (x, 0);
14320 else
14321 addend = XEXP (x, 0);
14323 x = XEXP (XEXP (x, 1), 0);
14324 if (GET_CODE (x) == PLUS
14325 && CONST_INT_P (XEXP (x, 1)))
14327 const_addend = XEXP (x, 1);
14328 x = XEXP (x, 0);
14331 if (GET_CODE (x) == UNSPEC
14332 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14333 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14334 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14335 && !MEM_P (orig_x) && !addend)))
14336 result = XVECEXP (x, 0, 0);
14338 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14339 && !MEM_P (orig_x))
14340 result = XVECEXP (x, 0, 0);
14342 if (! result)
14343 return ix86_delegitimize_tls_address (orig_x);
14345 if (const_addend)
14346 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14347 if (reg_addend)
14348 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14349 if (addend)
14351 /* If the rest of original X doesn't involve the PIC register, add
14352 addend and subtract pic_offset_table_rtx. This can happen e.g.
14353 for code like:
14354 leal (%ebx, %ecx, 4), %ecx
14356 movl foo@GOTOFF(%ecx), %edx
14357 in which case we return (%ecx - %ebx) + foo. */
14358 if (pic_offset_table_rtx)
14359 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14360 pic_offset_table_rtx),
14361 result);
14362 else
14363 return orig_x;
14365 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14367 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14368 if (result == NULL_RTX)
14369 return orig_x;
14371 return result;
14374 /* If X is a machine specific address (i.e. a symbol or label being
14375 referenced as a displacement from the GOT implemented using an
14376 UNSPEC), then return the base term. Otherwise return X. */
14379 ix86_find_base_term (rtx x)
14381 rtx term;
14383 if (TARGET_64BIT)
14385 if (GET_CODE (x) != CONST)
14386 return x;
14387 term = XEXP (x, 0);
14388 if (GET_CODE (term) == PLUS
14389 && (CONST_INT_P (XEXP (term, 1))
14390 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14391 term = XEXP (term, 0);
14392 if (GET_CODE (term) != UNSPEC
14393 || (XINT (term, 1) != UNSPEC_GOTPCREL
14394 && XINT (term, 1) != UNSPEC_PCREL))
14395 return x;
14397 return XVECEXP (term, 0, 0);
14400 return ix86_delegitimize_address (x);
14403 static void
14404 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14405 bool fp, FILE *file)
14407 const char *suffix;
14409 if (mode == CCFPmode || mode == CCFPUmode)
14411 code = ix86_fp_compare_code_to_integer (code);
14412 mode = CCmode;
14414 if (reverse)
14415 code = reverse_condition (code);
14417 switch (code)
14419 case EQ:
14420 switch (mode)
14422 case CCAmode:
14423 suffix = "a";
14424 break;
14426 case CCCmode:
14427 suffix = "c";
14428 break;
14430 case CCOmode:
14431 suffix = "o";
14432 break;
14434 case CCSmode:
14435 suffix = "s";
14436 break;
14438 default:
14439 suffix = "e";
14441 break;
14442 case NE:
14443 switch (mode)
14445 case CCAmode:
14446 suffix = "na";
14447 break;
14449 case CCCmode:
14450 suffix = "nc";
14451 break;
14453 case CCOmode:
14454 suffix = "no";
14455 break;
14457 case CCSmode:
14458 suffix = "ns";
14459 break;
14461 default:
14462 suffix = "ne";
14464 break;
14465 case GT:
14466 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14467 suffix = "g";
14468 break;
14469 case GTU:
14470 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14471 Those same assemblers have the same but opposite lossage on cmov. */
14472 if (mode == CCmode)
14473 suffix = fp ? "nbe" : "a";
14474 else
14475 gcc_unreachable ();
14476 break;
14477 case LT:
14478 switch (mode)
14480 case CCNOmode:
14481 case CCGOCmode:
14482 suffix = "s";
14483 break;
14485 case CCmode:
14486 case CCGCmode:
14487 suffix = "l";
14488 break;
14490 default:
14491 gcc_unreachable ();
14493 break;
14494 case LTU:
14495 if (mode == CCmode)
14496 suffix = "b";
14497 else if (mode == CCCmode)
14498 suffix = "c";
14499 else
14500 gcc_unreachable ();
14501 break;
14502 case GE:
14503 switch (mode)
14505 case CCNOmode:
14506 case CCGOCmode:
14507 suffix = "ns";
14508 break;
14510 case CCmode:
14511 case CCGCmode:
14512 suffix = "ge";
14513 break;
14515 default:
14516 gcc_unreachable ();
14518 break;
14519 case GEU:
14520 if (mode == CCmode)
14521 suffix = fp ? "nb" : "ae";
14522 else if (mode == CCCmode)
14523 suffix = "nc";
14524 else
14525 gcc_unreachable ();
14526 break;
14527 case LE:
14528 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14529 suffix = "le";
14530 break;
14531 case LEU:
14532 if (mode == CCmode)
14533 suffix = "be";
14534 else
14535 gcc_unreachable ();
14536 break;
14537 case UNORDERED:
14538 suffix = fp ? "u" : "p";
14539 break;
14540 case ORDERED:
14541 suffix = fp ? "nu" : "np";
14542 break;
14543 default:
14544 gcc_unreachable ();
14546 fputs (suffix, file);
14549 /* Print the name of register X to FILE based on its machine mode and number.
14550 If CODE is 'w', pretend the mode is HImode.
14551 If CODE is 'b', pretend the mode is QImode.
14552 If CODE is 'k', pretend the mode is SImode.
14553 If CODE is 'q', pretend the mode is DImode.
14554 If CODE is 'x', pretend the mode is V4SFmode.
14555 If CODE is 't', pretend the mode is V8SFmode.
14556 If CODE is 'g', pretend the mode is V16SFmode.
14557 If CODE is 'h', pretend the reg is the 'high' byte register.
14558 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14559 If CODE is 'd', duplicate the operand for AVX instruction.
14562 void
14563 print_reg (rtx x, int code, FILE *file)
14565 const char *reg;
14566 unsigned int regno;
14567 bool duplicated = code == 'd' && TARGET_AVX;
14569 if (ASSEMBLER_DIALECT == ASM_ATT)
14570 putc ('%', file);
14572 if (x == pc_rtx)
14574 gcc_assert (TARGET_64BIT);
14575 fputs ("rip", file);
14576 return;
14579 regno = true_regnum (x);
14580 gcc_assert (regno != ARG_POINTER_REGNUM
14581 && regno != FRAME_POINTER_REGNUM
14582 && regno != FLAGS_REG
14583 && regno != FPSR_REG
14584 && regno != FPCR_REG);
14586 if (code == 'w' || MMX_REG_P (x))
14587 code = 2;
14588 else if (code == 'b')
14589 code = 1;
14590 else if (code == 'k')
14591 code = 4;
14592 else if (code == 'q')
14593 code = 8;
14594 else if (code == 'y')
14595 code = 3;
14596 else if (code == 'h')
14597 code = 0;
14598 else if (code == 'x')
14599 code = 16;
14600 else if (code == 't')
14601 code = 32;
14602 else if (code == 'g')
14603 code = 64;
14604 else
14605 code = GET_MODE_SIZE (GET_MODE (x));
14607 /* Irritatingly, AMD extended registers use different naming convention
14608 from the normal registers: "r%d[bwd]" */
14609 if (REX_INT_REGNO_P (regno))
14611 gcc_assert (TARGET_64BIT);
14612 putc ('r', file);
14613 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14614 switch (code)
14616 case 0:
14617 error ("extended registers have no high halves");
14618 break;
14619 case 1:
14620 putc ('b', file);
14621 break;
14622 case 2:
14623 putc ('w', file);
14624 break;
14625 case 4:
14626 putc ('d', file);
14627 break;
14628 case 8:
14629 /* no suffix */
14630 break;
14631 default:
14632 error ("unsupported operand size for extended register");
14633 break;
14635 return;
14638 reg = NULL;
14639 switch (code)
14641 case 3:
14642 if (STACK_TOP_P (x))
14644 reg = "st(0)";
14645 break;
14647 /* FALLTHRU */
14648 case 8:
14649 case 4:
14650 case 12:
14651 if (! ANY_FP_REG_P (x))
14652 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14653 /* FALLTHRU */
14654 case 16:
14655 case 2:
14656 normal:
14657 reg = hi_reg_name[regno];
14658 break;
14659 case 1:
14660 if (regno >= ARRAY_SIZE (qi_reg_name))
14661 goto normal;
14662 reg = qi_reg_name[regno];
14663 break;
14664 case 0:
14665 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14666 goto normal;
14667 reg = qi_high_reg_name[regno];
14668 break;
14669 case 32:
14670 if (SSE_REG_P (x))
14672 gcc_assert (!duplicated);
14673 putc ('y', file);
14674 fputs (hi_reg_name[regno] + 1, file);
14675 return;
14677 case 64:
14678 if (SSE_REG_P (x))
14680 gcc_assert (!duplicated);
14681 putc ('z', file);
14682 fputs (hi_reg_name[REGNO (x)] + 1, file);
14683 return;
14685 break;
14686 default:
14687 gcc_unreachable ();
14690 fputs (reg, file);
14691 if (duplicated)
14693 if (ASSEMBLER_DIALECT == ASM_ATT)
14694 fprintf (file, ", %%%s", reg);
14695 else
14696 fprintf (file, ", %s", reg);
14700 /* Locate some local-dynamic symbol still in use by this function
14701 so that we can print its name in some tls_local_dynamic_base
14702 pattern. */
14704 static int
14705 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14707 rtx x = *px;
14709 if (GET_CODE (x) == SYMBOL_REF
14710 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14712 cfun->machine->some_ld_name = XSTR (x, 0);
14713 return 1;
14716 return 0;
14719 static const char *
14720 get_some_local_dynamic_name (void)
14722 rtx insn;
14724 if (cfun->machine->some_ld_name)
14725 return cfun->machine->some_ld_name;
14727 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14728 if (NONDEBUG_INSN_P (insn)
14729 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14730 return cfun->machine->some_ld_name;
14732 return NULL;
14735 /* Meaning of CODE:
14736 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14737 C -- print opcode suffix for set/cmov insn.
14738 c -- like C, but print reversed condition
14739 F,f -- likewise, but for floating-point.
14740 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14741 otherwise nothing
14742 R -- print embeded rounding and sae.
14743 r -- print only sae.
14744 z -- print the opcode suffix for the size of the current operand.
14745 Z -- likewise, with special suffixes for x87 instructions.
14746 * -- print a star (in certain assembler syntax)
14747 A -- print an absolute memory reference.
14748 E -- print address with DImode register names if TARGET_64BIT.
14749 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14750 s -- print a shift double count, followed by the assemblers argument
14751 delimiter.
14752 b -- print the QImode name of the register for the indicated operand.
14753 %b0 would print %al if operands[0] is reg 0.
14754 w -- likewise, print the HImode name of the register.
14755 k -- likewise, print the SImode name of the register.
14756 q -- likewise, print the DImode name of the register.
14757 x -- likewise, print the V4SFmode name of the register.
14758 t -- likewise, print the V8SFmode name of the register.
14759 g -- likewise, print the V16SFmode name of the register.
14760 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14761 y -- print "st(0)" instead of "st" as a register.
14762 d -- print duplicated register operand for AVX instruction.
14763 D -- print condition for SSE cmp instruction.
14764 P -- if PIC, print an @PLT suffix.
14765 p -- print raw symbol name.
14766 X -- don't print any sort of PIC '@' suffix for a symbol.
14767 & -- print some in-use local-dynamic symbol name.
14768 H -- print a memory address offset by 8; used for sse high-parts
14769 Y -- print condition for XOP pcom* instruction.
14770 + -- print a branch hint as 'cs' or 'ds' prefix
14771 ; -- print a semicolon (after prefixes due to bug in older gas).
14772 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14773 @ -- print a segment register of thread base pointer load
14774 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14777 void
14778 ix86_print_operand (FILE *file, rtx x, int code)
14780 if (code)
14782 switch (code)
14784 case 'A':
14785 switch (ASSEMBLER_DIALECT)
14787 case ASM_ATT:
14788 putc ('*', file);
14789 break;
14791 case ASM_INTEL:
14792 /* Intel syntax. For absolute addresses, registers should not
14793 be surrounded by braces. */
14794 if (!REG_P (x))
14796 putc ('[', file);
14797 ix86_print_operand (file, x, 0);
14798 putc (']', file);
14799 return;
14801 break;
14803 default:
14804 gcc_unreachable ();
14807 ix86_print_operand (file, x, 0);
14808 return;
14810 case 'E':
14811 /* Wrap address in an UNSPEC to declare special handling. */
14812 if (TARGET_64BIT)
14813 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14815 output_address (x);
14816 return;
14818 case 'L':
14819 if (ASSEMBLER_DIALECT == ASM_ATT)
14820 putc ('l', file);
14821 return;
14823 case 'W':
14824 if (ASSEMBLER_DIALECT == ASM_ATT)
14825 putc ('w', file);
14826 return;
14828 case 'B':
14829 if (ASSEMBLER_DIALECT == ASM_ATT)
14830 putc ('b', file);
14831 return;
14833 case 'Q':
14834 if (ASSEMBLER_DIALECT == ASM_ATT)
14835 putc ('l', file);
14836 return;
14838 case 'S':
14839 if (ASSEMBLER_DIALECT == ASM_ATT)
14840 putc ('s', file);
14841 return;
14843 case 'T':
14844 if (ASSEMBLER_DIALECT == ASM_ATT)
14845 putc ('t', file);
14846 return;
14848 case 'O':
14849 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14850 if (ASSEMBLER_DIALECT != ASM_ATT)
14851 return;
14853 switch (GET_MODE_SIZE (GET_MODE (x)))
14855 case 2:
14856 putc ('w', file);
14857 break;
14859 case 4:
14860 putc ('l', file);
14861 break;
14863 case 8:
14864 putc ('q', file);
14865 break;
14867 default:
14868 output_operand_lossage
14869 ("invalid operand size for operand code 'O'");
14870 return;
14873 putc ('.', file);
14874 #endif
14875 return;
14877 case 'z':
14878 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14880 /* Opcodes don't get size suffixes if using Intel opcodes. */
14881 if (ASSEMBLER_DIALECT == ASM_INTEL)
14882 return;
14884 switch (GET_MODE_SIZE (GET_MODE (x)))
14886 case 1:
14887 putc ('b', file);
14888 return;
14890 case 2:
14891 putc ('w', file);
14892 return;
14894 case 4:
14895 putc ('l', file);
14896 return;
14898 case 8:
14899 putc ('q', file);
14900 return;
14902 default:
14903 output_operand_lossage
14904 ("invalid operand size for operand code 'z'");
14905 return;
14909 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14910 warning
14911 (0, "non-integer operand used with operand code 'z'");
14912 /* FALLTHRU */
14914 case 'Z':
14915 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14916 if (ASSEMBLER_DIALECT == ASM_INTEL)
14917 return;
14919 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14921 switch (GET_MODE_SIZE (GET_MODE (x)))
14923 case 2:
14924 #ifdef HAVE_AS_IX86_FILDS
14925 putc ('s', file);
14926 #endif
14927 return;
14929 case 4:
14930 putc ('l', file);
14931 return;
14933 case 8:
14934 #ifdef HAVE_AS_IX86_FILDQ
14935 putc ('q', file);
14936 #else
14937 fputs ("ll", file);
14938 #endif
14939 return;
14941 default:
14942 break;
14945 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14947 /* 387 opcodes don't get size suffixes
14948 if the operands are registers. */
14949 if (STACK_REG_P (x))
14950 return;
14952 switch (GET_MODE_SIZE (GET_MODE (x)))
14954 case 4:
14955 putc ('s', file);
14956 return;
14958 case 8:
14959 putc ('l', file);
14960 return;
14962 case 12:
14963 case 16:
14964 putc ('t', file);
14965 return;
14967 default:
14968 break;
14971 else
14973 output_operand_lossage
14974 ("invalid operand type used with operand code 'Z'");
14975 return;
14978 output_operand_lossage
14979 ("invalid operand size for operand code 'Z'");
14980 return;
14982 case 'd':
14983 case 'b':
14984 case 'w':
14985 case 'k':
14986 case 'q':
14987 case 'h':
14988 case 't':
14989 case 'g':
14990 case 'y':
14991 case 'x':
14992 case 'X':
14993 case 'P':
14994 case 'p':
14995 break;
14997 case 's':
14998 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15000 ix86_print_operand (file, x, 0);
15001 fputs (", ", file);
15003 return;
15005 case 'Y':
15006 switch (GET_CODE (x))
15008 case NE:
15009 fputs ("neq", file);
15010 break;
15011 case EQ:
15012 fputs ("eq", file);
15013 break;
15014 case GE:
15015 case GEU:
15016 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15017 break;
15018 case GT:
15019 case GTU:
15020 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15021 break;
15022 case LE:
15023 case LEU:
15024 fputs ("le", file);
15025 break;
15026 case LT:
15027 case LTU:
15028 fputs ("lt", file);
15029 break;
15030 case UNORDERED:
15031 fputs ("unord", file);
15032 break;
15033 case ORDERED:
15034 fputs ("ord", file);
15035 break;
15036 case UNEQ:
15037 fputs ("ueq", file);
15038 break;
15039 case UNGE:
15040 fputs ("nlt", file);
15041 break;
15042 case UNGT:
15043 fputs ("nle", file);
15044 break;
15045 case UNLE:
15046 fputs ("ule", file);
15047 break;
15048 case UNLT:
15049 fputs ("ult", file);
15050 break;
15051 case LTGT:
15052 fputs ("une", file);
15053 break;
15054 default:
15055 output_operand_lossage ("operand is not a condition code, "
15056 "invalid operand code 'Y'");
15057 return;
15059 return;
15061 case 'D':
15062 /* Little bit of braindamage here. The SSE compare instructions
15063 does use completely different names for the comparisons that the
15064 fp conditional moves. */
15065 switch (GET_CODE (x))
15067 case UNEQ:
15068 if (TARGET_AVX)
15070 fputs ("eq_us", file);
15071 break;
15073 case EQ:
15074 fputs ("eq", file);
15075 break;
15076 case UNLT:
15077 if (TARGET_AVX)
15079 fputs ("nge", file);
15080 break;
15082 case LT:
15083 fputs ("lt", file);
15084 break;
15085 case UNLE:
15086 if (TARGET_AVX)
15088 fputs ("ngt", file);
15089 break;
15091 case LE:
15092 fputs ("le", file);
15093 break;
15094 case UNORDERED:
15095 fputs ("unord", file);
15096 break;
15097 case LTGT:
15098 if (TARGET_AVX)
15100 fputs ("neq_oq", file);
15101 break;
15103 case NE:
15104 fputs ("neq", file);
15105 break;
15106 case GE:
15107 if (TARGET_AVX)
15109 fputs ("ge", file);
15110 break;
15112 case UNGE:
15113 fputs ("nlt", file);
15114 break;
15115 case GT:
15116 if (TARGET_AVX)
15118 fputs ("gt", file);
15119 break;
15121 case UNGT:
15122 fputs ("nle", file);
15123 break;
15124 case ORDERED:
15125 fputs ("ord", file);
15126 break;
15127 default:
15128 output_operand_lossage ("operand is not a condition code, "
15129 "invalid operand code 'D'");
15130 return;
15132 return;
15134 case 'F':
15135 case 'f':
15136 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15137 if (ASSEMBLER_DIALECT == ASM_ATT)
15138 putc ('.', file);
15139 #endif
15141 case 'C':
15142 case 'c':
15143 if (!COMPARISON_P (x))
15145 output_operand_lossage ("operand is not a condition code, "
15146 "invalid operand code '%c'", code);
15147 return;
15149 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15150 code == 'c' || code == 'f',
15151 code == 'F' || code == 'f',
15152 file);
15153 return;
15155 case 'H':
15156 if (!offsettable_memref_p (x))
15158 output_operand_lossage ("operand is not an offsettable memory "
15159 "reference, invalid operand code 'H'");
15160 return;
15162 /* It doesn't actually matter what mode we use here, as we're
15163 only going to use this for printing. */
15164 x = adjust_address_nv (x, DImode, 8);
15165 /* Output 'qword ptr' for intel assembler dialect. */
15166 if (ASSEMBLER_DIALECT == ASM_INTEL)
15167 code = 'q';
15168 break;
15170 case 'K':
15171 gcc_assert (CONST_INT_P (x));
15173 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15174 #ifdef HAVE_AS_IX86_HLE
15175 fputs ("xacquire ", file);
15176 #else
15177 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15178 #endif
15179 else if (INTVAL (x) & IX86_HLE_RELEASE)
15180 #ifdef HAVE_AS_IX86_HLE
15181 fputs ("xrelease ", file);
15182 #else
15183 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15184 #endif
15185 /* We do not want to print value of the operand. */
15186 return;
15188 case 'N':
15189 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15190 fputs ("{z}", file);
15191 return;
15193 case 'r':
15194 gcc_assert (CONST_INT_P (x));
15195 gcc_assert (INTVAL (x) == ROUND_SAE);
15197 if (ASSEMBLER_DIALECT == ASM_INTEL)
15198 fputs (", ", file);
15200 fputs ("{sae}", file);
15202 if (ASSEMBLER_DIALECT == ASM_ATT)
15203 fputs (", ", file);
15205 return;
15207 case 'R':
15208 gcc_assert (CONST_INT_P (x));
15210 if (ASSEMBLER_DIALECT == ASM_INTEL)
15211 fputs (", ", file);
15213 switch (INTVAL (x))
15215 case ROUND_NEAREST_INT | ROUND_SAE:
15216 fputs ("{rn-sae}", file);
15217 break;
15218 case ROUND_NEG_INF | ROUND_SAE:
15219 fputs ("{rd-sae}", file);
15220 break;
15221 case ROUND_POS_INF | ROUND_SAE:
15222 fputs ("{ru-sae}", file);
15223 break;
15224 case ROUND_ZERO | ROUND_SAE:
15225 fputs ("{rz-sae}", file);
15226 break;
15227 default:
15228 gcc_unreachable ();
15231 if (ASSEMBLER_DIALECT == ASM_ATT)
15232 fputs (", ", file);
15234 return;
15236 case '*':
15237 if (ASSEMBLER_DIALECT == ASM_ATT)
15238 putc ('*', file);
15239 return;
15241 case '&':
15243 const char *name = get_some_local_dynamic_name ();
15244 if (name == NULL)
15245 output_operand_lossage ("'%%&' used without any "
15246 "local dynamic TLS references");
15247 else
15248 assemble_name (file, name);
15249 return;
15252 case '+':
15254 rtx x;
15256 if (!optimize
15257 || optimize_function_for_size_p (cfun)
15258 || !TARGET_BRANCH_PREDICTION_HINTS)
15259 return;
15261 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15262 if (x)
15264 int pred_val = XINT (x, 0);
15266 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15267 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15269 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15270 bool cputaken
15271 = final_forward_branch_p (current_output_insn) == 0;
15273 /* Emit hints only in the case default branch prediction
15274 heuristics would fail. */
15275 if (taken != cputaken)
15277 /* We use 3e (DS) prefix for taken branches and
15278 2e (CS) prefix for not taken branches. */
15279 if (taken)
15280 fputs ("ds ; ", file);
15281 else
15282 fputs ("cs ; ", file);
15286 return;
15289 case ';':
15290 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15291 putc (';', file);
15292 #endif
15293 return;
15295 case '@':
15296 if (ASSEMBLER_DIALECT == ASM_ATT)
15297 putc ('%', file);
15299 /* The kernel uses a different segment register for performance
15300 reasons; a system call would not have to trash the userspace
15301 segment register, which would be expensive. */
15302 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15303 fputs ("fs", file);
15304 else
15305 fputs ("gs", file);
15306 return;
15308 case '~':
15309 putc (TARGET_AVX2 ? 'i' : 'f', file);
15310 return;
15312 case '^':
15313 if (TARGET_64BIT && Pmode != word_mode)
15314 fputs ("addr32 ", file);
15315 return;
15317 default:
15318 output_operand_lossage ("invalid operand code '%c'", code);
15322 if (REG_P (x))
15323 print_reg (x, code, file);
15325 else if (MEM_P (x))
15327 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15328 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15329 && GET_MODE (x) != BLKmode)
15331 const char * size;
15332 switch (GET_MODE_SIZE (GET_MODE (x)))
15334 case 1: size = "BYTE"; break;
15335 case 2: size = "WORD"; break;
15336 case 4: size = "DWORD"; break;
15337 case 8: size = "QWORD"; break;
15338 case 12: size = "TBYTE"; break;
15339 case 16:
15340 if (GET_MODE (x) == XFmode)
15341 size = "TBYTE";
15342 else
15343 size = "XMMWORD";
15344 break;
15345 case 32: size = "YMMWORD"; break;
15346 case 64: size = "ZMMWORD"; break;
15347 default:
15348 gcc_unreachable ();
15351 /* Check for explicit size override (codes 'b', 'w', 'k',
15352 'q' and 'x') */
15353 if (code == 'b')
15354 size = "BYTE";
15355 else if (code == 'w')
15356 size = "WORD";
15357 else if (code == 'k')
15358 size = "DWORD";
15359 else if (code == 'q')
15360 size = "QWORD";
15361 else if (code == 'x')
15362 size = "XMMWORD";
15364 fputs (size, file);
15365 fputs (" PTR ", file);
15368 x = XEXP (x, 0);
15369 /* Avoid (%rip) for call operands. */
15370 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15371 && !CONST_INT_P (x))
15372 output_addr_const (file, x);
15373 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15374 output_operand_lossage ("invalid constraints for operand");
15375 else
15376 output_address (x);
15379 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15381 REAL_VALUE_TYPE r;
15382 long l;
15384 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15385 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15387 if (ASSEMBLER_DIALECT == ASM_ATT)
15388 putc ('$', file);
15389 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15390 if (code == 'q')
15391 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15392 (unsigned long long) (int) l);
15393 else
15394 fprintf (file, "0x%08x", (unsigned int) l);
15397 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15399 REAL_VALUE_TYPE r;
15400 long l[2];
15402 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15403 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15405 if (ASSEMBLER_DIALECT == ASM_ATT)
15406 putc ('$', file);
15407 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15410 /* These float cases don't actually occur as immediate operands. */
15411 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15413 char dstr[30];
15415 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15416 fputs (dstr, file);
15419 else
15421 /* We have patterns that allow zero sets of memory, for instance.
15422 In 64-bit mode, we should probably support all 8-byte vectors,
15423 since we can in fact encode that into an immediate. */
15424 if (GET_CODE (x) == CONST_VECTOR)
15426 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15427 x = const0_rtx;
15430 if (code != 'P' && code != 'p')
15432 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15434 if (ASSEMBLER_DIALECT == ASM_ATT)
15435 putc ('$', file);
15437 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15438 || GET_CODE (x) == LABEL_REF)
15440 if (ASSEMBLER_DIALECT == ASM_ATT)
15441 putc ('$', file);
15442 else
15443 fputs ("OFFSET FLAT:", file);
15446 if (CONST_INT_P (x))
15447 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15448 else if (flag_pic || MACHOPIC_INDIRECT)
15449 output_pic_addr_const (file, x, code);
15450 else
15451 output_addr_const (file, x);
15455 static bool
15456 ix86_print_operand_punct_valid_p (unsigned char code)
15458 return (code == '@' || code == '*' || code == '+' || code == '&'
15459 || code == ';' || code == '~' || code == '^');
15462 /* Print a memory operand whose address is ADDR. */
15464 static void
15465 ix86_print_operand_address (FILE *file, rtx addr)
15467 struct ix86_address parts;
15468 rtx base, index, disp;
15469 int scale;
15470 int ok;
15471 bool vsib = false;
15472 int code = 0;
15474 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15476 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15477 gcc_assert (parts.index == NULL_RTX);
15478 parts.index = XVECEXP (addr, 0, 1);
15479 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15480 addr = XVECEXP (addr, 0, 0);
15481 vsib = true;
15483 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15485 gcc_assert (TARGET_64BIT);
15486 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15487 code = 'q';
15489 else
15490 ok = ix86_decompose_address (addr, &parts);
15492 gcc_assert (ok);
15494 base = parts.base;
15495 index = parts.index;
15496 disp = parts.disp;
15497 scale = parts.scale;
15499 switch (parts.seg)
15501 case SEG_DEFAULT:
15502 break;
15503 case SEG_FS:
15504 case SEG_GS:
15505 if (ASSEMBLER_DIALECT == ASM_ATT)
15506 putc ('%', file);
15507 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15508 break;
15509 default:
15510 gcc_unreachable ();
15513 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15514 if (TARGET_64BIT && !base && !index)
15516 rtx symbol = disp;
15518 if (GET_CODE (disp) == CONST
15519 && GET_CODE (XEXP (disp, 0)) == PLUS
15520 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15521 symbol = XEXP (XEXP (disp, 0), 0);
15523 if (GET_CODE (symbol) == LABEL_REF
15524 || (GET_CODE (symbol) == SYMBOL_REF
15525 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15526 base = pc_rtx;
15528 if (!base && !index)
15530 /* Displacement only requires special attention. */
15532 if (CONST_INT_P (disp))
15534 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15535 fputs ("ds:", file);
15536 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15538 else if (flag_pic)
15539 output_pic_addr_const (file, disp, 0);
15540 else
15541 output_addr_const (file, disp);
15543 else
15545 /* Print SImode register names to force addr32 prefix. */
15546 if (SImode_address_operand (addr, VOIDmode))
15548 #ifdef ENABLE_CHECKING
15549 gcc_assert (TARGET_64BIT);
15550 switch (GET_CODE (addr))
15552 case SUBREG:
15553 gcc_assert (GET_MODE (addr) == SImode);
15554 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15555 break;
15556 case ZERO_EXTEND:
15557 case AND:
15558 gcc_assert (GET_MODE (addr) == DImode);
15559 break;
15560 default:
15561 gcc_unreachable ();
15563 #endif
15564 gcc_assert (!code);
15565 code = 'k';
15567 else if (code == 0
15568 && TARGET_X32
15569 && disp
15570 && CONST_INT_P (disp)
15571 && INTVAL (disp) < -16*1024*1024)
15573 /* X32 runs in 64-bit mode, where displacement, DISP, in
15574 address DISP(%r64), is encoded as 32-bit immediate sign-
15575 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15576 address is %r64 + 0xffffffffbffffd00. When %r64 <
15577 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15578 which is invalid for x32. The correct address is %r64
15579 - 0x40000300 == 0xf7ffdd64. To properly encode
15580 -0x40000300(%r64) for x32, we zero-extend negative
15581 displacement by forcing addr32 prefix which truncates
15582 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15583 zero-extend all negative displacements, including -1(%rsp).
15584 However, for small negative displacements, sign-extension
15585 won't cause overflow. We only zero-extend negative
15586 displacements if they < -16*1024*1024, which is also used
15587 to check legitimate address displacements for PIC. */
15588 code = 'k';
15591 if (ASSEMBLER_DIALECT == ASM_ATT)
15593 if (disp)
15595 if (flag_pic)
15596 output_pic_addr_const (file, disp, 0);
15597 else if (GET_CODE (disp) == LABEL_REF)
15598 output_asm_label (disp);
15599 else
15600 output_addr_const (file, disp);
15603 putc ('(', file);
15604 if (base)
15605 print_reg (base, code, file);
15606 if (index)
15608 putc (',', file);
15609 print_reg (index, vsib ? 0 : code, file);
15610 if (scale != 1 || vsib)
15611 fprintf (file, ",%d", scale);
15613 putc (')', file);
15615 else
15617 rtx offset = NULL_RTX;
15619 if (disp)
15621 /* Pull out the offset of a symbol; print any symbol itself. */
15622 if (GET_CODE (disp) == CONST
15623 && GET_CODE (XEXP (disp, 0)) == PLUS
15624 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15626 offset = XEXP (XEXP (disp, 0), 1);
15627 disp = gen_rtx_CONST (VOIDmode,
15628 XEXP (XEXP (disp, 0), 0));
15631 if (flag_pic)
15632 output_pic_addr_const (file, disp, 0);
15633 else if (GET_CODE (disp) == LABEL_REF)
15634 output_asm_label (disp);
15635 else if (CONST_INT_P (disp))
15636 offset = disp;
15637 else
15638 output_addr_const (file, disp);
15641 putc ('[', file);
15642 if (base)
15644 print_reg (base, code, file);
15645 if (offset)
15647 if (INTVAL (offset) >= 0)
15648 putc ('+', file);
15649 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15652 else if (offset)
15653 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15654 else
15655 putc ('0', file);
15657 if (index)
15659 putc ('+', file);
15660 print_reg (index, vsib ? 0 : code, file);
15661 if (scale != 1 || vsib)
15662 fprintf (file, "*%d", scale);
15664 putc (']', file);
15669 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15671 static bool
15672 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15674 rtx op;
15676 if (GET_CODE (x) != UNSPEC)
15677 return false;
15679 op = XVECEXP (x, 0, 0);
15680 switch (XINT (x, 1))
15682 case UNSPEC_GOTTPOFF:
15683 output_addr_const (file, op);
15684 /* FIXME: This might be @TPOFF in Sun ld. */
15685 fputs ("@gottpoff", file);
15686 break;
15687 case UNSPEC_TPOFF:
15688 output_addr_const (file, op);
15689 fputs ("@tpoff", file);
15690 break;
15691 case UNSPEC_NTPOFF:
15692 output_addr_const (file, op);
15693 if (TARGET_64BIT)
15694 fputs ("@tpoff", file);
15695 else
15696 fputs ("@ntpoff", file);
15697 break;
15698 case UNSPEC_DTPOFF:
15699 output_addr_const (file, op);
15700 fputs ("@dtpoff", file);
15701 break;
15702 case UNSPEC_GOTNTPOFF:
15703 output_addr_const (file, op);
15704 if (TARGET_64BIT)
15705 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15706 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15707 else
15708 fputs ("@gotntpoff", file);
15709 break;
15710 case UNSPEC_INDNTPOFF:
15711 output_addr_const (file, op);
15712 fputs ("@indntpoff", file);
15713 break;
15714 #if TARGET_MACHO
15715 case UNSPEC_MACHOPIC_OFFSET:
15716 output_addr_const (file, op);
15717 putc ('-', file);
15718 machopic_output_function_base_name (file);
15719 break;
15720 #endif
15722 case UNSPEC_STACK_CHECK:
15724 int offset;
15726 gcc_assert (flag_split_stack);
15728 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15729 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15730 #else
15731 gcc_unreachable ();
15732 #endif
15734 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15736 break;
15738 default:
15739 return false;
15742 return true;
15745 /* Split one or more double-mode RTL references into pairs of half-mode
15746 references. The RTL can be REG, offsettable MEM, integer constant, or
15747 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15748 split and "num" is its length. lo_half and hi_half are output arrays
15749 that parallel "operands". */
15751 void
15752 split_double_mode (enum machine_mode mode, rtx operands[],
15753 int num, rtx lo_half[], rtx hi_half[])
15755 enum machine_mode half_mode;
15756 unsigned int byte;
15758 switch (mode)
15760 case TImode:
15761 half_mode = DImode;
15762 break;
15763 case DImode:
15764 half_mode = SImode;
15765 break;
15766 default:
15767 gcc_unreachable ();
15770 byte = GET_MODE_SIZE (half_mode);
15772 while (num--)
15774 rtx op = operands[num];
15776 /* simplify_subreg refuse to split volatile memory addresses,
15777 but we still have to handle it. */
15778 if (MEM_P (op))
15780 lo_half[num] = adjust_address (op, half_mode, 0);
15781 hi_half[num] = adjust_address (op, half_mode, byte);
15783 else
15785 lo_half[num] = simplify_gen_subreg (half_mode, op,
15786 GET_MODE (op) == VOIDmode
15787 ? mode : GET_MODE (op), 0);
15788 hi_half[num] = simplify_gen_subreg (half_mode, op,
15789 GET_MODE (op) == VOIDmode
15790 ? mode : GET_MODE (op), byte);
15795 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15796 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15797 is the expression of the binary operation. The output may either be
15798 emitted here, or returned to the caller, like all output_* functions.
15800 There is no guarantee that the operands are the same mode, as they
15801 might be within FLOAT or FLOAT_EXTEND expressions. */
15803 #ifndef SYSV386_COMPAT
15804 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15805 wants to fix the assemblers because that causes incompatibility
15806 with gcc. No-one wants to fix gcc because that causes
15807 incompatibility with assemblers... You can use the option of
15808 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15809 #define SYSV386_COMPAT 1
15810 #endif
15812 const char *
15813 output_387_binary_op (rtx insn, rtx *operands)
15815 static char buf[40];
15816 const char *p;
15817 const char *ssep;
15818 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15820 #ifdef ENABLE_CHECKING
15821 /* Even if we do not want to check the inputs, this documents input
15822 constraints. Which helps in understanding the following code. */
15823 if (STACK_REG_P (operands[0])
15824 && ((REG_P (operands[1])
15825 && REGNO (operands[0]) == REGNO (operands[1])
15826 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15827 || (REG_P (operands[2])
15828 && REGNO (operands[0]) == REGNO (operands[2])
15829 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15830 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15831 ; /* ok */
15832 else
15833 gcc_assert (is_sse);
15834 #endif
15836 switch (GET_CODE (operands[3]))
15838 case PLUS:
15839 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15840 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15841 p = "fiadd";
15842 else
15843 p = "fadd";
15844 ssep = "vadd";
15845 break;
15847 case MINUS:
15848 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15849 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15850 p = "fisub";
15851 else
15852 p = "fsub";
15853 ssep = "vsub";
15854 break;
15856 case MULT:
15857 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15858 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15859 p = "fimul";
15860 else
15861 p = "fmul";
15862 ssep = "vmul";
15863 break;
15865 case DIV:
15866 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15867 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15868 p = "fidiv";
15869 else
15870 p = "fdiv";
15871 ssep = "vdiv";
15872 break;
15874 default:
15875 gcc_unreachable ();
15878 if (is_sse)
15880 if (TARGET_AVX)
15882 strcpy (buf, ssep);
15883 if (GET_MODE (operands[0]) == SFmode)
15884 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15885 else
15886 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15888 else
15890 strcpy (buf, ssep + 1);
15891 if (GET_MODE (operands[0]) == SFmode)
15892 strcat (buf, "ss\t{%2, %0|%0, %2}");
15893 else
15894 strcat (buf, "sd\t{%2, %0|%0, %2}");
15896 return buf;
15898 strcpy (buf, p);
15900 switch (GET_CODE (operands[3]))
15902 case MULT:
15903 case PLUS:
15904 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15906 rtx temp = operands[2];
15907 operands[2] = operands[1];
15908 operands[1] = temp;
15911 /* know operands[0] == operands[1]. */
15913 if (MEM_P (operands[2]))
15915 p = "%Z2\t%2";
15916 break;
15919 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15921 if (STACK_TOP_P (operands[0]))
15922 /* How is it that we are storing to a dead operand[2]?
15923 Well, presumably operands[1] is dead too. We can't
15924 store the result to st(0) as st(0) gets popped on this
15925 instruction. Instead store to operands[2] (which I
15926 think has to be st(1)). st(1) will be popped later.
15927 gcc <= 2.8.1 didn't have this check and generated
15928 assembly code that the Unixware assembler rejected. */
15929 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15930 else
15931 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15932 break;
15935 if (STACK_TOP_P (operands[0]))
15936 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15937 else
15938 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15939 break;
15941 case MINUS:
15942 case DIV:
15943 if (MEM_P (operands[1]))
15945 p = "r%Z1\t%1";
15946 break;
15949 if (MEM_P (operands[2]))
15951 p = "%Z2\t%2";
15952 break;
15955 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15957 #if SYSV386_COMPAT
15958 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15959 derived assemblers, confusingly reverse the direction of
15960 the operation for fsub{r} and fdiv{r} when the
15961 destination register is not st(0). The Intel assembler
15962 doesn't have this brain damage. Read !SYSV386_COMPAT to
15963 figure out what the hardware really does. */
15964 if (STACK_TOP_P (operands[0]))
15965 p = "{p\t%0, %2|rp\t%2, %0}";
15966 else
15967 p = "{rp\t%2, %0|p\t%0, %2}";
15968 #else
15969 if (STACK_TOP_P (operands[0]))
15970 /* As above for fmul/fadd, we can't store to st(0). */
15971 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15972 else
15973 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15974 #endif
15975 break;
15978 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15980 #if SYSV386_COMPAT
15981 if (STACK_TOP_P (operands[0]))
15982 p = "{rp\t%0, %1|p\t%1, %0}";
15983 else
15984 p = "{p\t%1, %0|rp\t%0, %1}";
15985 #else
15986 if (STACK_TOP_P (operands[0]))
15987 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15988 else
15989 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15990 #endif
15991 break;
15994 if (STACK_TOP_P (operands[0]))
15996 if (STACK_TOP_P (operands[1]))
15997 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15998 else
15999 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16000 break;
16002 else if (STACK_TOP_P (operands[1]))
16004 #if SYSV386_COMPAT
16005 p = "{\t%1, %0|r\t%0, %1}";
16006 #else
16007 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16008 #endif
16010 else
16012 #if SYSV386_COMPAT
16013 p = "{r\t%2, %0|\t%0, %2}";
16014 #else
16015 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16016 #endif
16018 break;
16020 default:
16021 gcc_unreachable ();
16024 strcat (buf, p);
16025 return buf;
16028 /* Check if a 256bit AVX register is referenced inside of EXP. */
16030 static int
16031 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16033 rtx exp = *pexp;
16035 if (GET_CODE (exp) == SUBREG)
16036 exp = SUBREG_REG (exp);
16038 if (REG_P (exp)
16039 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16040 return 1;
16042 return 0;
16045 /* Return needed mode for entity in optimize_mode_switching pass. */
16047 static int
16048 ix86_avx_u128_mode_needed (rtx insn)
16050 if (CALL_P (insn))
16052 rtx link;
16054 /* Needed mode is set to AVX_U128_CLEAN if there are
16055 no 256bit modes used in function arguments. */
16056 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16057 link;
16058 link = XEXP (link, 1))
16060 if (GET_CODE (XEXP (link, 0)) == USE)
16062 rtx arg = XEXP (XEXP (link, 0), 0);
16064 if (ix86_check_avx256_register (&arg, NULL))
16065 return AVX_U128_DIRTY;
16069 return AVX_U128_CLEAN;
16072 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16073 changes state only when a 256bit register is written to, but we need
16074 to prevent the compiler from moving optimal insertion point above
16075 eventual read from 256bit register. */
16076 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16077 return AVX_U128_DIRTY;
16079 return AVX_U128_ANY;
16082 /* Return mode that i387 must be switched into
16083 prior to the execution of insn. */
16085 static int
16086 ix86_i387_mode_needed (int entity, rtx insn)
16088 enum attr_i387_cw mode;
16090 /* The mode UNINITIALIZED is used to store control word after a
16091 function call or ASM pattern. The mode ANY specify that function
16092 has no requirements on the control word and make no changes in the
16093 bits we are interested in. */
16095 if (CALL_P (insn)
16096 || (NONJUMP_INSN_P (insn)
16097 && (asm_noperands (PATTERN (insn)) >= 0
16098 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16099 return I387_CW_UNINITIALIZED;
16101 if (recog_memoized (insn) < 0)
16102 return I387_CW_ANY;
16104 mode = get_attr_i387_cw (insn);
16106 switch (entity)
16108 case I387_TRUNC:
16109 if (mode == I387_CW_TRUNC)
16110 return mode;
16111 break;
16113 case I387_FLOOR:
16114 if (mode == I387_CW_FLOOR)
16115 return mode;
16116 break;
16118 case I387_CEIL:
16119 if (mode == I387_CW_CEIL)
16120 return mode;
16121 break;
16123 case I387_MASK_PM:
16124 if (mode == I387_CW_MASK_PM)
16125 return mode;
16126 break;
16128 default:
16129 gcc_unreachable ();
16132 return I387_CW_ANY;
16135 /* Return mode that entity must be switched into
16136 prior to the execution of insn. */
16139 ix86_mode_needed (int entity, rtx insn)
16141 switch (entity)
16143 case AVX_U128:
16144 return ix86_avx_u128_mode_needed (insn);
16145 case I387_TRUNC:
16146 case I387_FLOOR:
16147 case I387_CEIL:
16148 case I387_MASK_PM:
16149 return ix86_i387_mode_needed (entity, insn);
16150 default:
16151 gcc_unreachable ();
16153 return 0;
16156 /* Check if a 256bit AVX register is referenced in stores. */
16158 static void
16159 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16161 if (ix86_check_avx256_register (&dest, NULL))
16163 bool *used = (bool *) data;
16164 *used = true;
16168 /* Calculate mode of upper 128bit AVX registers after the insn. */
16170 static int
16171 ix86_avx_u128_mode_after (int mode, rtx insn)
16173 rtx pat = PATTERN (insn);
16175 if (vzeroupper_operation (pat, VOIDmode)
16176 || vzeroall_operation (pat, VOIDmode))
16177 return AVX_U128_CLEAN;
16179 /* We know that state is clean after CALL insn if there are no
16180 256bit registers used in the function return register. */
16181 if (CALL_P (insn))
16183 bool avx_reg256_found = false;
16184 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16186 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16189 /* Otherwise, return current mode. Remember that if insn
16190 references AVX 256bit registers, the mode was already changed
16191 to DIRTY from MODE_NEEDED. */
16192 return mode;
16195 /* Return the mode that an insn results in. */
16198 ix86_mode_after (int entity, int mode, rtx insn)
16200 switch (entity)
16202 case AVX_U128:
16203 return ix86_avx_u128_mode_after (mode, insn);
16204 case I387_TRUNC:
16205 case I387_FLOOR:
16206 case I387_CEIL:
16207 case I387_MASK_PM:
16208 return mode;
16209 default:
16210 gcc_unreachable ();
16214 static int
16215 ix86_avx_u128_mode_entry (void)
16217 tree arg;
16219 /* Entry mode is set to AVX_U128_DIRTY if there are
16220 256bit modes used in function arguments. */
16221 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16222 arg = TREE_CHAIN (arg))
16224 rtx incoming = DECL_INCOMING_RTL (arg);
16226 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16227 return AVX_U128_DIRTY;
16230 return AVX_U128_CLEAN;
16233 /* Return a mode that ENTITY is assumed to be
16234 switched to at function entry. */
16237 ix86_mode_entry (int entity)
16239 switch (entity)
16241 case AVX_U128:
16242 return ix86_avx_u128_mode_entry ();
16243 case I387_TRUNC:
16244 case I387_FLOOR:
16245 case I387_CEIL:
16246 case I387_MASK_PM:
16247 return I387_CW_ANY;
16248 default:
16249 gcc_unreachable ();
16253 static int
16254 ix86_avx_u128_mode_exit (void)
16256 rtx reg = crtl->return_rtx;
16258 /* Exit mode is set to AVX_U128_DIRTY if there are
16259 256bit modes used in the function return register. */
16260 if (reg && ix86_check_avx256_register (&reg, NULL))
16261 return AVX_U128_DIRTY;
16263 return AVX_U128_CLEAN;
16266 /* Return a mode that ENTITY is assumed to be
16267 switched to at function exit. */
16270 ix86_mode_exit (int entity)
16272 switch (entity)
16274 case AVX_U128:
16275 return ix86_avx_u128_mode_exit ();
16276 case I387_TRUNC:
16277 case I387_FLOOR:
16278 case I387_CEIL:
16279 case I387_MASK_PM:
16280 return I387_CW_ANY;
16281 default:
16282 gcc_unreachable ();
16286 /* Output code to initialize control word copies used by trunc?f?i and
16287 rounding patterns. CURRENT_MODE is set to current control word,
16288 while NEW_MODE is set to new control word. */
16290 static void
16291 emit_i387_cw_initialization (int mode)
16293 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16294 rtx new_mode;
16296 enum ix86_stack_slot slot;
16298 rtx reg = gen_reg_rtx (HImode);
16300 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16301 emit_move_insn (reg, copy_rtx (stored_mode));
16303 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16304 || optimize_insn_for_size_p ())
16306 switch (mode)
16308 case I387_CW_TRUNC:
16309 /* round toward zero (truncate) */
16310 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16311 slot = SLOT_CW_TRUNC;
16312 break;
16314 case I387_CW_FLOOR:
16315 /* round down toward -oo */
16316 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16317 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16318 slot = SLOT_CW_FLOOR;
16319 break;
16321 case I387_CW_CEIL:
16322 /* round up toward +oo */
16323 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16324 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16325 slot = SLOT_CW_CEIL;
16326 break;
16328 case I387_CW_MASK_PM:
16329 /* mask precision exception for nearbyint() */
16330 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16331 slot = SLOT_CW_MASK_PM;
16332 break;
16334 default:
16335 gcc_unreachable ();
16338 else
16340 switch (mode)
16342 case I387_CW_TRUNC:
16343 /* round toward zero (truncate) */
16344 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16345 slot = SLOT_CW_TRUNC;
16346 break;
16348 case I387_CW_FLOOR:
16349 /* round down toward -oo */
16350 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16351 slot = SLOT_CW_FLOOR;
16352 break;
16354 case I387_CW_CEIL:
16355 /* round up toward +oo */
16356 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16357 slot = SLOT_CW_CEIL;
16358 break;
16360 case I387_CW_MASK_PM:
16361 /* mask precision exception for nearbyint() */
16362 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16363 slot = SLOT_CW_MASK_PM;
16364 break;
16366 default:
16367 gcc_unreachable ();
16371 gcc_assert (slot < MAX_386_STACK_LOCALS);
16373 new_mode = assign_386_stack_local (HImode, slot);
16374 emit_move_insn (new_mode, reg);
16377 /* Emit vzeroupper. */
16379 void
16380 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16382 int i;
16384 /* Cancel automatic vzeroupper insertion if there are
16385 live call-saved SSE registers at the insertion point. */
16387 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16388 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16389 return;
16391 if (TARGET_64BIT)
16392 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16393 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16394 return;
16396 emit_insn (gen_avx_vzeroupper ());
16399 /* Generate one or more insns to set ENTITY to MODE. */
16401 void
16402 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16404 switch (entity)
16406 case AVX_U128:
16407 if (mode == AVX_U128_CLEAN)
16408 ix86_avx_emit_vzeroupper (regs_live);
16409 break;
16410 case I387_TRUNC:
16411 case I387_FLOOR:
16412 case I387_CEIL:
16413 case I387_MASK_PM:
16414 if (mode != I387_CW_ANY
16415 && mode != I387_CW_UNINITIALIZED)
16416 emit_i387_cw_initialization (mode);
16417 break;
16418 default:
16419 gcc_unreachable ();
16423 /* Output code for INSN to convert a float to a signed int. OPERANDS
16424 are the insn operands. The output may be [HSD]Imode and the input
16425 operand may be [SDX]Fmode. */
16427 const char *
16428 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16430 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16431 int dimode_p = GET_MODE (operands[0]) == DImode;
16432 int round_mode = get_attr_i387_cw (insn);
16434 /* Jump through a hoop or two for DImode, since the hardware has no
16435 non-popping instruction. We used to do this a different way, but
16436 that was somewhat fragile and broke with post-reload splitters. */
16437 if ((dimode_p || fisttp) && !stack_top_dies)
16438 output_asm_insn ("fld\t%y1", operands);
16440 gcc_assert (STACK_TOP_P (operands[1]));
16441 gcc_assert (MEM_P (operands[0]));
16442 gcc_assert (GET_MODE (operands[1]) != TFmode);
16444 if (fisttp)
16445 output_asm_insn ("fisttp%Z0\t%0", operands);
16446 else
16448 if (round_mode != I387_CW_ANY)
16449 output_asm_insn ("fldcw\t%3", operands);
16450 if (stack_top_dies || dimode_p)
16451 output_asm_insn ("fistp%Z0\t%0", operands);
16452 else
16453 output_asm_insn ("fist%Z0\t%0", operands);
16454 if (round_mode != I387_CW_ANY)
16455 output_asm_insn ("fldcw\t%2", operands);
16458 return "";
16461 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16462 have the values zero or one, indicates the ffreep insn's operand
16463 from the OPERANDS array. */
16465 static const char *
16466 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16468 if (TARGET_USE_FFREEP)
16469 #ifdef HAVE_AS_IX86_FFREEP
16470 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16471 #else
16473 static char retval[32];
16474 int regno = REGNO (operands[opno]);
16476 gcc_assert (STACK_REGNO_P (regno));
16478 regno -= FIRST_STACK_REG;
16480 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16481 return retval;
16483 #endif
16485 return opno ? "fstp\t%y1" : "fstp\t%y0";
16489 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16490 should be used. UNORDERED_P is true when fucom should be used. */
16492 const char *
16493 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16495 int stack_top_dies;
16496 rtx cmp_op0, cmp_op1;
16497 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16499 if (eflags_p)
16501 cmp_op0 = operands[0];
16502 cmp_op1 = operands[1];
16504 else
16506 cmp_op0 = operands[1];
16507 cmp_op1 = operands[2];
16510 if (is_sse)
16512 if (GET_MODE (operands[0]) == SFmode)
16513 if (unordered_p)
16514 return "%vucomiss\t{%1, %0|%0, %1}";
16515 else
16516 return "%vcomiss\t{%1, %0|%0, %1}";
16517 else
16518 if (unordered_p)
16519 return "%vucomisd\t{%1, %0|%0, %1}";
16520 else
16521 return "%vcomisd\t{%1, %0|%0, %1}";
16524 gcc_assert (STACK_TOP_P (cmp_op0));
16526 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16528 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16530 if (stack_top_dies)
16532 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16533 return output_387_ffreep (operands, 1);
16535 else
16536 return "ftst\n\tfnstsw\t%0";
16539 if (STACK_REG_P (cmp_op1)
16540 && stack_top_dies
16541 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16542 && REGNO (cmp_op1) != FIRST_STACK_REG)
16544 /* If both the top of the 387 stack dies, and the other operand
16545 is also a stack register that dies, then this must be a
16546 `fcompp' float compare */
16548 if (eflags_p)
16550 /* There is no double popping fcomi variant. Fortunately,
16551 eflags is immune from the fstp's cc clobbering. */
16552 if (unordered_p)
16553 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16554 else
16555 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16556 return output_387_ffreep (operands, 0);
16558 else
16560 if (unordered_p)
16561 return "fucompp\n\tfnstsw\t%0";
16562 else
16563 return "fcompp\n\tfnstsw\t%0";
16566 else
16568 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16570 static const char * const alt[16] =
16572 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16573 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16574 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16575 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16577 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16578 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16579 NULL,
16580 NULL,
16582 "fcomi\t{%y1, %0|%0, %y1}",
16583 "fcomip\t{%y1, %0|%0, %y1}",
16584 "fucomi\t{%y1, %0|%0, %y1}",
16585 "fucomip\t{%y1, %0|%0, %y1}",
16587 NULL,
16588 NULL,
16589 NULL,
16590 NULL
16593 int mask;
16594 const char *ret;
16596 mask = eflags_p << 3;
16597 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16598 mask |= unordered_p << 1;
16599 mask |= stack_top_dies;
16601 gcc_assert (mask < 16);
16602 ret = alt[mask];
16603 gcc_assert (ret);
16605 return ret;
16609 void
16610 ix86_output_addr_vec_elt (FILE *file, int value)
16612 const char *directive = ASM_LONG;
16614 #ifdef ASM_QUAD
16615 if (TARGET_LP64)
16616 directive = ASM_QUAD;
16617 #else
16618 gcc_assert (!TARGET_64BIT);
16619 #endif
16621 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16624 void
16625 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16627 const char *directive = ASM_LONG;
16629 #ifdef ASM_QUAD
16630 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16631 directive = ASM_QUAD;
16632 #else
16633 gcc_assert (!TARGET_64BIT);
16634 #endif
16635 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16636 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16637 fprintf (file, "%s%s%d-%s%d\n",
16638 directive, LPREFIX, value, LPREFIX, rel);
16639 else if (HAVE_AS_GOTOFF_IN_DATA)
16640 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16641 #if TARGET_MACHO
16642 else if (TARGET_MACHO)
16644 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16645 machopic_output_function_base_name (file);
16646 putc ('\n', file);
16648 #endif
16649 else
16650 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16651 GOT_SYMBOL_NAME, LPREFIX, value);
16654 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16655 for the target. */
16657 void
16658 ix86_expand_clear (rtx dest)
16660 rtx tmp;
16662 /* We play register width games, which are only valid after reload. */
16663 gcc_assert (reload_completed);
16665 /* Avoid HImode and its attendant prefix byte. */
16666 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16667 dest = gen_rtx_REG (SImode, REGNO (dest));
16668 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16670 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16671 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16673 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16674 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16677 emit_insn (tmp);
16680 /* X is an unchanging MEM. If it is a constant pool reference, return
16681 the constant pool rtx, else NULL. */
16684 maybe_get_pool_constant (rtx x)
16686 x = ix86_delegitimize_address (XEXP (x, 0));
16688 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16689 return get_pool_constant (x);
16691 return NULL_RTX;
16694 void
16695 ix86_expand_move (enum machine_mode mode, rtx operands[])
16697 rtx op0, op1;
16698 enum tls_model model;
16700 op0 = operands[0];
16701 op1 = operands[1];
16703 if (GET_CODE (op1) == SYMBOL_REF)
16705 rtx tmp;
16707 model = SYMBOL_REF_TLS_MODEL (op1);
16708 if (model)
16710 op1 = legitimize_tls_address (op1, model, true);
16711 op1 = force_operand (op1, op0);
16712 if (op1 == op0)
16713 return;
16714 op1 = convert_to_mode (mode, op1, 1);
16716 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16717 op1 = tmp;
16719 else if (GET_CODE (op1) == CONST
16720 && GET_CODE (XEXP (op1, 0)) == PLUS
16721 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16723 rtx addend = XEXP (XEXP (op1, 0), 1);
16724 rtx symbol = XEXP (XEXP (op1, 0), 0);
16725 rtx tmp;
16727 model = SYMBOL_REF_TLS_MODEL (symbol);
16728 if (model)
16729 tmp = legitimize_tls_address (symbol, model, true);
16730 else
16731 tmp = legitimize_pe_coff_symbol (symbol, true);
16733 if (tmp)
16735 tmp = force_operand (tmp, NULL);
16736 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16737 op0, 1, OPTAB_DIRECT);
16738 if (tmp == op0)
16739 return;
16740 op1 = convert_to_mode (mode, tmp, 1);
16744 if ((flag_pic || MACHOPIC_INDIRECT)
16745 && symbolic_operand (op1, mode))
16747 if (TARGET_MACHO && !TARGET_64BIT)
16749 #if TARGET_MACHO
16750 /* dynamic-no-pic */
16751 if (MACHOPIC_INDIRECT)
16753 rtx temp = ((reload_in_progress
16754 || ((op0 && REG_P (op0))
16755 && mode == Pmode))
16756 ? op0 : gen_reg_rtx (Pmode));
16757 op1 = machopic_indirect_data_reference (op1, temp);
16758 if (MACHOPIC_PURE)
16759 op1 = machopic_legitimize_pic_address (op1, mode,
16760 temp == op1 ? 0 : temp);
16762 if (op0 != op1 && GET_CODE (op0) != MEM)
16764 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16765 emit_insn (insn);
16766 return;
16768 if (GET_CODE (op0) == MEM)
16769 op1 = force_reg (Pmode, op1);
16770 else
16772 rtx temp = op0;
16773 if (GET_CODE (temp) != REG)
16774 temp = gen_reg_rtx (Pmode);
16775 temp = legitimize_pic_address (op1, temp);
16776 if (temp == op0)
16777 return;
16778 op1 = temp;
16780 /* dynamic-no-pic */
16781 #endif
16783 else
16785 if (MEM_P (op0))
16786 op1 = force_reg (mode, op1);
16787 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16789 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16790 op1 = legitimize_pic_address (op1, reg);
16791 if (op0 == op1)
16792 return;
16793 op1 = convert_to_mode (mode, op1, 1);
16797 else
16799 if (MEM_P (op0)
16800 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16801 || !push_operand (op0, mode))
16802 && MEM_P (op1))
16803 op1 = force_reg (mode, op1);
16805 if (push_operand (op0, mode)
16806 && ! general_no_elim_operand (op1, mode))
16807 op1 = copy_to_mode_reg (mode, op1);
16809 /* Force large constants in 64bit compilation into register
16810 to get them CSEed. */
16811 if (can_create_pseudo_p ()
16812 && (mode == DImode) && TARGET_64BIT
16813 && immediate_operand (op1, mode)
16814 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16815 && !register_operand (op0, mode)
16816 && optimize)
16817 op1 = copy_to_mode_reg (mode, op1);
16819 if (can_create_pseudo_p ()
16820 && FLOAT_MODE_P (mode)
16821 && GET_CODE (op1) == CONST_DOUBLE)
16823 /* If we are loading a floating point constant to a register,
16824 force the value to memory now, since we'll get better code
16825 out the back end. */
16827 op1 = validize_mem (force_const_mem (mode, op1));
16828 if (!register_operand (op0, mode))
16830 rtx temp = gen_reg_rtx (mode);
16831 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16832 emit_move_insn (op0, temp);
16833 return;
16838 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16841 void
16842 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16844 rtx op0 = operands[0], op1 = operands[1];
16845 unsigned int align = GET_MODE_ALIGNMENT (mode);
16847 if (push_operand (op0, VOIDmode))
16848 op0 = emit_move_resolve_push (mode, op0);
16850 /* Force constants other than zero into memory. We do not know how
16851 the instructions used to build constants modify the upper 64 bits
16852 of the register, once we have that information we may be able
16853 to handle some of them more efficiently. */
16854 if (can_create_pseudo_p ()
16855 && register_operand (op0, mode)
16856 && (CONSTANT_P (op1)
16857 || (GET_CODE (op1) == SUBREG
16858 && CONSTANT_P (SUBREG_REG (op1))))
16859 && !standard_sse_constant_p (op1))
16860 op1 = validize_mem (force_const_mem (mode, op1));
16862 /* We need to check memory alignment for SSE mode since attribute
16863 can make operands unaligned. */
16864 if (can_create_pseudo_p ()
16865 && SSE_REG_MODE_P (mode)
16866 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16867 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16869 rtx tmp[2];
16871 /* ix86_expand_vector_move_misalign() does not like constants ... */
16872 if (CONSTANT_P (op1)
16873 || (GET_CODE (op1) == SUBREG
16874 && CONSTANT_P (SUBREG_REG (op1))))
16875 op1 = validize_mem (force_const_mem (mode, op1));
16877 /* ... nor both arguments in memory. */
16878 if (!register_operand (op0, mode)
16879 && !register_operand (op1, mode))
16880 op1 = force_reg (mode, op1);
16882 tmp[0] = op0; tmp[1] = op1;
16883 ix86_expand_vector_move_misalign (mode, tmp);
16884 return;
16887 /* Make operand1 a register if it isn't already. */
16888 if (can_create_pseudo_p ()
16889 && !register_operand (op0, mode)
16890 && !register_operand (op1, mode))
16892 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16893 return;
16896 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16899 /* Split 32-byte AVX unaligned load and store if needed. */
16901 static void
16902 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16904 rtx m;
16905 rtx (*extract) (rtx, rtx, rtx);
16906 rtx (*load_unaligned) (rtx, rtx);
16907 rtx (*store_unaligned) (rtx, rtx);
16908 enum machine_mode mode;
16910 switch (GET_MODE (op0))
16912 default:
16913 gcc_unreachable ();
16914 case V32QImode:
16915 extract = gen_avx_vextractf128v32qi;
16916 load_unaligned = gen_avx_loaddquv32qi;
16917 store_unaligned = gen_avx_storedquv32qi;
16918 mode = V16QImode;
16919 break;
16920 case V8SFmode:
16921 extract = gen_avx_vextractf128v8sf;
16922 load_unaligned = gen_avx_loadups256;
16923 store_unaligned = gen_avx_storeups256;
16924 mode = V4SFmode;
16925 break;
16926 case V4DFmode:
16927 extract = gen_avx_vextractf128v4df;
16928 load_unaligned = gen_avx_loadupd256;
16929 store_unaligned = gen_avx_storeupd256;
16930 mode = V2DFmode;
16931 break;
16934 if (MEM_P (op1))
16936 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16938 rtx r = gen_reg_rtx (mode);
16939 m = adjust_address (op1, mode, 0);
16940 emit_move_insn (r, m);
16941 m = adjust_address (op1, mode, 16);
16942 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16943 emit_move_insn (op0, r);
16945 /* Normal *mov<mode>_internal pattern will handle
16946 unaligned loads just fine if misaligned_operand
16947 is true, and without the UNSPEC it can be combined
16948 with arithmetic instructions. */
16949 else if (misaligned_operand (op1, GET_MODE (op1)))
16950 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16951 else
16952 emit_insn (load_unaligned (op0, op1));
16954 else if (MEM_P (op0))
16956 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16958 m = adjust_address (op0, mode, 0);
16959 emit_insn (extract (m, op1, const0_rtx));
16960 m = adjust_address (op0, mode, 16);
16961 emit_insn (extract (m, op1, const1_rtx));
16963 else
16964 emit_insn (store_unaligned (op0, op1));
16966 else
16967 gcc_unreachable ();
16970 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16971 straight to ix86_expand_vector_move. */
16972 /* Code generation for scalar reg-reg moves of single and double precision data:
16973 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16974 movaps reg, reg
16975 else
16976 movss reg, reg
16977 if (x86_sse_partial_reg_dependency == true)
16978 movapd reg, reg
16979 else
16980 movsd reg, reg
16982 Code generation for scalar loads of double precision data:
16983 if (x86_sse_split_regs == true)
16984 movlpd mem, reg (gas syntax)
16985 else
16986 movsd mem, reg
16988 Code generation for unaligned packed loads of single precision data
16989 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16990 if (x86_sse_unaligned_move_optimal)
16991 movups mem, reg
16993 if (x86_sse_partial_reg_dependency == true)
16995 xorps reg, reg
16996 movlps mem, reg
16997 movhps mem+8, reg
16999 else
17001 movlps mem, reg
17002 movhps mem+8, reg
17005 Code generation for unaligned packed loads of double precision data
17006 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17007 if (x86_sse_unaligned_move_optimal)
17008 movupd mem, reg
17010 if (x86_sse_split_regs == true)
17012 movlpd mem, reg
17013 movhpd mem+8, reg
17015 else
17017 movsd mem, reg
17018 movhpd mem+8, reg
17022 void
17023 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17025 rtx op0, op1, orig_op0 = NULL_RTX, m;
17026 rtx (*load_unaligned) (rtx, rtx);
17027 rtx (*store_unaligned) (rtx, rtx);
17029 op0 = operands[0];
17030 op1 = operands[1];
17032 if (GET_MODE_SIZE (mode) == 64)
17034 switch (GET_MODE_CLASS (mode))
17036 case MODE_VECTOR_INT:
17037 case MODE_INT:
17038 if (GET_MODE (op0) != V16SImode)
17040 if (!MEM_P (op0))
17042 orig_op0 = op0;
17043 op0 = gen_reg_rtx (V16SImode);
17045 else
17046 op0 = gen_lowpart (V16SImode, op0);
17048 op1 = gen_lowpart (V16SImode, op1);
17049 /* FALLTHRU */
17051 case MODE_VECTOR_FLOAT:
17052 switch (GET_MODE (op0))
17054 default:
17055 gcc_unreachable ();
17056 case V16SImode:
17057 load_unaligned = gen_avx512f_loaddquv16si;
17058 store_unaligned = gen_avx512f_storedquv16si;
17059 break;
17060 case V16SFmode:
17061 load_unaligned = gen_avx512f_loadups512;
17062 store_unaligned = gen_avx512f_storeups512;
17063 break;
17064 case V8DFmode:
17065 load_unaligned = gen_avx512f_loadupd512;
17066 store_unaligned = gen_avx512f_storeupd512;
17067 break;
17070 if (MEM_P (op1))
17071 emit_insn (load_unaligned (op0, op1));
17072 else if (MEM_P (op0))
17073 emit_insn (store_unaligned (op0, op1));
17074 else
17075 gcc_unreachable ();
17076 if (orig_op0)
17077 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17078 break;
17080 default:
17081 gcc_unreachable ();
17084 return;
17087 if (TARGET_AVX
17088 && GET_MODE_SIZE (mode) == 32)
17090 switch (GET_MODE_CLASS (mode))
17092 case MODE_VECTOR_INT:
17093 case MODE_INT:
17094 if (GET_MODE (op0) != V32QImode)
17096 if (!MEM_P (op0))
17098 orig_op0 = op0;
17099 op0 = gen_reg_rtx (V32QImode);
17101 else
17102 op0 = gen_lowpart (V32QImode, op0);
17104 op1 = gen_lowpart (V32QImode, op1);
17105 /* FALLTHRU */
17107 case MODE_VECTOR_FLOAT:
17108 ix86_avx256_split_vector_move_misalign (op0, op1);
17109 if (orig_op0)
17110 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17111 break;
17113 default:
17114 gcc_unreachable ();
17117 return;
17120 if (MEM_P (op1))
17122 /* Normal *mov<mode>_internal pattern will handle
17123 unaligned loads just fine if misaligned_operand
17124 is true, and without the UNSPEC it can be combined
17125 with arithmetic instructions. */
17126 if (TARGET_AVX
17127 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17128 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17129 && misaligned_operand (op1, GET_MODE (op1)))
17130 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17131 /* ??? If we have typed data, then it would appear that using
17132 movdqu is the only way to get unaligned data loaded with
17133 integer type. */
17134 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17136 if (GET_MODE (op0) != V16QImode)
17138 orig_op0 = op0;
17139 op0 = gen_reg_rtx (V16QImode);
17141 op1 = gen_lowpart (V16QImode, op1);
17142 /* We will eventually emit movups based on insn attributes. */
17143 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17144 if (orig_op0)
17145 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17147 else if (TARGET_SSE2 && mode == V2DFmode)
17149 rtx zero;
17151 if (TARGET_AVX
17152 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17153 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17154 || optimize_insn_for_size_p ())
17156 /* We will eventually emit movups based on insn attributes. */
17157 emit_insn (gen_sse2_loadupd (op0, op1));
17158 return;
17161 /* When SSE registers are split into halves, we can avoid
17162 writing to the top half twice. */
17163 if (TARGET_SSE_SPLIT_REGS)
17165 emit_clobber (op0);
17166 zero = op0;
17168 else
17170 /* ??? Not sure about the best option for the Intel chips.
17171 The following would seem to satisfy; the register is
17172 entirely cleared, breaking the dependency chain. We
17173 then store to the upper half, with a dependency depth
17174 of one. A rumor has it that Intel recommends two movsd
17175 followed by an unpacklpd, but this is unconfirmed. And
17176 given that the dependency depth of the unpacklpd would
17177 still be one, I'm not sure why this would be better. */
17178 zero = CONST0_RTX (V2DFmode);
17181 m = adjust_address (op1, DFmode, 0);
17182 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17183 m = adjust_address (op1, DFmode, 8);
17184 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17186 else
17188 rtx t;
17190 if (TARGET_AVX
17191 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17192 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17193 || optimize_insn_for_size_p ())
17195 if (GET_MODE (op0) != V4SFmode)
17197 orig_op0 = op0;
17198 op0 = gen_reg_rtx (V4SFmode);
17200 op1 = gen_lowpart (V4SFmode, op1);
17201 emit_insn (gen_sse_loadups (op0, op1));
17202 if (orig_op0)
17203 emit_move_insn (orig_op0,
17204 gen_lowpart (GET_MODE (orig_op0), op0));
17205 return;
17208 if (mode != V4SFmode)
17209 t = gen_reg_rtx (V4SFmode);
17210 else
17211 t = op0;
17213 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17214 emit_move_insn (t, CONST0_RTX (V4SFmode));
17215 else
17216 emit_clobber (t);
17218 m = adjust_address (op1, V2SFmode, 0);
17219 emit_insn (gen_sse_loadlps (t, t, m));
17220 m = adjust_address (op1, V2SFmode, 8);
17221 emit_insn (gen_sse_loadhps (t, t, m));
17222 if (mode != V4SFmode)
17223 emit_move_insn (op0, gen_lowpart (mode, t));
17226 else if (MEM_P (op0))
17228 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17230 op0 = gen_lowpart (V16QImode, op0);
17231 op1 = gen_lowpart (V16QImode, op1);
17232 /* We will eventually emit movups based on insn attributes. */
17233 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17235 else if (TARGET_SSE2 && mode == V2DFmode)
17237 if (TARGET_AVX
17238 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17239 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17240 || optimize_insn_for_size_p ())
17241 /* We will eventually emit movups based on insn attributes. */
17242 emit_insn (gen_sse2_storeupd (op0, op1));
17243 else
17245 m = adjust_address (op0, DFmode, 0);
17246 emit_insn (gen_sse2_storelpd (m, op1));
17247 m = adjust_address (op0, DFmode, 8);
17248 emit_insn (gen_sse2_storehpd (m, op1));
17251 else
17253 if (mode != V4SFmode)
17254 op1 = gen_lowpart (V4SFmode, op1);
17256 if (TARGET_AVX
17257 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17258 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17259 || optimize_insn_for_size_p ())
17261 op0 = gen_lowpart (V4SFmode, op0);
17262 emit_insn (gen_sse_storeups (op0, op1));
17264 else
17266 m = adjust_address (op0, V2SFmode, 0);
17267 emit_insn (gen_sse_storelps (m, op1));
17268 m = adjust_address (op0, V2SFmode, 8);
17269 emit_insn (gen_sse_storehps (m, op1));
17273 else
17274 gcc_unreachable ();
17277 /* Helper function of ix86_fixup_binary_operands to canonicalize
17278 operand order. Returns true if the operands should be swapped. */
17280 static bool
17281 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17282 rtx operands[])
17284 rtx dst = operands[0];
17285 rtx src1 = operands[1];
17286 rtx src2 = operands[2];
17288 /* If the operation is not commutative, we can't do anything. */
17289 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17290 return false;
17292 /* Highest priority is that src1 should match dst. */
17293 if (rtx_equal_p (dst, src1))
17294 return false;
17295 if (rtx_equal_p (dst, src2))
17296 return true;
17298 /* Next highest priority is that immediate constants come second. */
17299 if (immediate_operand (src2, mode))
17300 return false;
17301 if (immediate_operand (src1, mode))
17302 return true;
17304 /* Lowest priority is that memory references should come second. */
17305 if (MEM_P (src2))
17306 return false;
17307 if (MEM_P (src1))
17308 return true;
17310 return false;
17314 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17315 destination to use for the operation. If different from the true
17316 destination in operands[0], a copy operation will be required. */
17319 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17320 rtx operands[])
17322 rtx dst = operands[0];
17323 rtx src1 = operands[1];
17324 rtx src2 = operands[2];
17326 /* Canonicalize operand order. */
17327 if (ix86_swap_binary_operands_p (code, mode, operands))
17329 rtx temp;
17331 /* It is invalid to swap operands of different modes. */
17332 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17334 temp = src1;
17335 src1 = src2;
17336 src2 = temp;
17339 /* Both source operands cannot be in memory. */
17340 if (MEM_P (src1) && MEM_P (src2))
17342 /* Optimization: Only read from memory once. */
17343 if (rtx_equal_p (src1, src2))
17345 src2 = force_reg (mode, src2);
17346 src1 = src2;
17348 else if (rtx_equal_p (dst, src1))
17349 src2 = force_reg (mode, src2);
17350 else
17351 src1 = force_reg (mode, src1);
17354 /* If the destination is memory, and we do not have matching source
17355 operands, do things in registers. */
17356 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17357 dst = gen_reg_rtx (mode);
17359 /* Source 1 cannot be a constant. */
17360 if (CONSTANT_P (src1))
17361 src1 = force_reg (mode, src1);
17363 /* Source 1 cannot be a non-matching memory. */
17364 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17365 src1 = force_reg (mode, src1);
17367 /* Improve address combine. */
17368 if (code == PLUS
17369 && GET_MODE_CLASS (mode) == MODE_INT
17370 && MEM_P (src2))
17371 src2 = force_reg (mode, src2);
17373 operands[1] = src1;
17374 operands[2] = src2;
17375 return dst;
17378 /* Similarly, but assume that the destination has already been
17379 set up properly. */
17381 void
17382 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17383 enum machine_mode mode, rtx operands[])
17385 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17386 gcc_assert (dst == operands[0]);
17389 /* Attempt to expand a binary operator. Make the expansion closer to the
17390 actual machine, then just general_operand, which will allow 3 separate
17391 memory references (one output, two input) in a single insn. */
17393 void
17394 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17395 rtx operands[])
17397 rtx src1, src2, dst, op, clob;
17399 dst = ix86_fixup_binary_operands (code, mode, operands);
17400 src1 = operands[1];
17401 src2 = operands[2];
17403 /* Emit the instruction. */
17405 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17406 if (reload_in_progress)
17408 /* Reload doesn't know about the flags register, and doesn't know that
17409 it doesn't want to clobber it. We can only do this with PLUS. */
17410 gcc_assert (code == PLUS);
17411 emit_insn (op);
17413 else if (reload_completed
17414 && code == PLUS
17415 && !rtx_equal_p (dst, src1))
17417 /* This is going to be an LEA; avoid splitting it later. */
17418 emit_insn (op);
17420 else
17422 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17423 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17426 /* Fix up the destination if needed. */
17427 if (dst != operands[0])
17428 emit_move_insn (operands[0], dst);
17431 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17432 the given OPERANDS. */
17434 void
17435 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17436 rtx operands[])
17438 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17439 if (GET_CODE (operands[1]) == SUBREG)
17441 op1 = operands[1];
17442 op2 = operands[2];
17444 else if (GET_CODE (operands[2]) == SUBREG)
17446 op1 = operands[2];
17447 op2 = operands[1];
17449 /* Optimize (__m128i) d | (__m128i) e and similar code
17450 when d and e are float vectors into float vector logical
17451 insn. In C/C++ without using intrinsics there is no other way
17452 to express vector logical operation on float vectors than
17453 to cast them temporarily to integer vectors. */
17454 if (op1
17455 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17456 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17457 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17458 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17459 && SUBREG_BYTE (op1) == 0
17460 && (GET_CODE (op2) == CONST_VECTOR
17461 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17462 && SUBREG_BYTE (op2) == 0))
17463 && can_create_pseudo_p ())
17465 rtx dst;
17466 switch (GET_MODE (SUBREG_REG (op1)))
17468 case V4SFmode:
17469 case V8SFmode:
17470 case V2DFmode:
17471 case V4DFmode:
17472 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17473 if (GET_CODE (op2) == CONST_VECTOR)
17475 op2 = gen_lowpart (GET_MODE (dst), op2);
17476 op2 = force_reg (GET_MODE (dst), op2);
17478 else
17480 op1 = operands[1];
17481 op2 = SUBREG_REG (operands[2]);
17482 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17483 op2 = force_reg (GET_MODE (dst), op2);
17485 op1 = SUBREG_REG (op1);
17486 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17487 op1 = force_reg (GET_MODE (dst), op1);
17488 emit_insn (gen_rtx_SET (VOIDmode, dst,
17489 gen_rtx_fmt_ee (code, GET_MODE (dst),
17490 op1, op2)));
17491 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17492 return;
17493 default:
17494 break;
17497 if (!nonimmediate_operand (operands[1], mode))
17498 operands[1] = force_reg (mode, operands[1]);
17499 if (!nonimmediate_operand (operands[2], mode))
17500 operands[2] = force_reg (mode, operands[2]);
17501 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17502 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17503 gen_rtx_fmt_ee (code, mode, operands[1],
17504 operands[2])));
17507 /* Return TRUE or FALSE depending on whether the binary operator meets the
17508 appropriate constraints. */
17510 bool
17511 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17512 rtx operands[3])
17514 rtx dst = operands[0];
17515 rtx src1 = operands[1];
17516 rtx src2 = operands[2];
17518 /* Both source operands cannot be in memory. */
17519 if (MEM_P (src1) && MEM_P (src2))
17520 return false;
17522 /* Canonicalize operand order for commutative operators. */
17523 if (ix86_swap_binary_operands_p (code, mode, operands))
17525 rtx temp = src1;
17526 src1 = src2;
17527 src2 = temp;
17530 /* If the destination is memory, we must have a matching source operand. */
17531 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17532 return false;
17534 /* Source 1 cannot be a constant. */
17535 if (CONSTANT_P (src1))
17536 return false;
17538 /* Source 1 cannot be a non-matching memory. */
17539 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17540 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17541 return (code == AND
17542 && (mode == HImode
17543 || mode == SImode
17544 || (TARGET_64BIT && mode == DImode))
17545 && satisfies_constraint_L (src2));
17547 return true;
17550 /* Attempt to expand a unary operator. Make the expansion closer to the
17551 actual machine, then just general_operand, which will allow 2 separate
17552 memory references (one output, one input) in a single insn. */
17554 void
17555 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17556 rtx operands[])
17558 int matching_memory;
17559 rtx src, dst, op, clob;
17561 dst = operands[0];
17562 src = operands[1];
17564 /* If the destination is memory, and we do not have matching source
17565 operands, do things in registers. */
17566 matching_memory = 0;
17567 if (MEM_P (dst))
17569 if (rtx_equal_p (dst, src))
17570 matching_memory = 1;
17571 else
17572 dst = gen_reg_rtx (mode);
17575 /* When source operand is memory, destination must match. */
17576 if (MEM_P (src) && !matching_memory)
17577 src = force_reg (mode, src);
17579 /* Emit the instruction. */
17581 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17582 if (reload_in_progress || code == NOT)
17584 /* Reload doesn't know about the flags register, and doesn't know that
17585 it doesn't want to clobber it. */
17586 gcc_assert (code == NOT);
17587 emit_insn (op);
17589 else
17591 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17592 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17595 /* Fix up the destination if needed. */
17596 if (dst != operands[0])
17597 emit_move_insn (operands[0], dst);
17600 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17601 divisor are within the range [0-255]. */
17603 void
17604 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17605 bool signed_p)
17607 rtx end_label, qimode_label;
17608 rtx insn, div, mod;
17609 rtx scratch, tmp0, tmp1, tmp2;
17610 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17611 rtx (*gen_zero_extend) (rtx, rtx);
17612 rtx (*gen_test_ccno_1) (rtx, rtx);
17614 switch (mode)
17616 case SImode:
17617 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17618 gen_test_ccno_1 = gen_testsi_ccno_1;
17619 gen_zero_extend = gen_zero_extendqisi2;
17620 break;
17621 case DImode:
17622 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17623 gen_test_ccno_1 = gen_testdi_ccno_1;
17624 gen_zero_extend = gen_zero_extendqidi2;
17625 break;
17626 default:
17627 gcc_unreachable ();
17630 end_label = gen_label_rtx ();
17631 qimode_label = gen_label_rtx ();
17633 scratch = gen_reg_rtx (mode);
17635 /* Use 8bit unsigned divimod if dividend and divisor are within
17636 the range [0-255]. */
17637 emit_move_insn (scratch, operands[2]);
17638 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17639 scratch, 1, OPTAB_DIRECT);
17640 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17641 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17642 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17643 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17644 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17645 pc_rtx);
17646 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17647 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17648 JUMP_LABEL (insn) = qimode_label;
17650 /* Generate original signed/unsigned divimod. */
17651 div = gen_divmod4_1 (operands[0], operands[1],
17652 operands[2], operands[3]);
17653 emit_insn (div);
17655 /* Branch to the end. */
17656 emit_jump_insn (gen_jump (end_label));
17657 emit_barrier ();
17659 /* Generate 8bit unsigned divide. */
17660 emit_label (qimode_label);
17661 /* Don't use operands[0] for result of 8bit divide since not all
17662 registers support QImode ZERO_EXTRACT. */
17663 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17664 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17665 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17666 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17668 if (signed_p)
17670 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17671 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17673 else
17675 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17676 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17679 /* Extract remainder from AH. */
17680 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17681 if (REG_P (operands[1]))
17682 insn = emit_move_insn (operands[1], tmp1);
17683 else
17685 /* Need a new scratch register since the old one has result
17686 of 8bit divide. */
17687 scratch = gen_reg_rtx (mode);
17688 emit_move_insn (scratch, tmp1);
17689 insn = emit_move_insn (operands[1], scratch);
17691 set_unique_reg_note (insn, REG_EQUAL, mod);
17693 /* Zero extend quotient from AL. */
17694 tmp1 = gen_lowpart (QImode, tmp0);
17695 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17696 set_unique_reg_note (insn, REG_EQUAL, div);
17698 emit_label (end_label);
17701 /* Whether it is OK to emit CFI directives when emitting asm code. */
17703 bool
17704 ix86_emit_cfi ()
17706 return dwarf2out_do_cfi_asm ();
17709 #define LEA_MAX_STALL (3)
17710 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17712 /* Increase given DISTANCE in half-cycles according to
17713 dependencies between PREV and NEXT instructions.
17714 Add 1 half-cycle if there is no dependency and
17715 go to next cycle if there is some dependecy. */
17717 static unsigned int
17718 increase_distance (rtx prev, rtx next, unsigned int distance)
17720 df_ref *use_rec;
17721 df_ref *def_rec;
17723 if (!prev || !next)
17724 return distance + (distance & 1) + 2;
17726 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17727 return distance + 1;
17729 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17730 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17731 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17732 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17733 return distance + (distance & 1) + 2;
17735 return distance + 1;
17738 /* Function checks if instruction INSN defines register number
17739 REGNO1 or REGNO2. */
17741 static bool
17742 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17743 rtx insn)
17745 df_ref *def_rec;
17747 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17748 if (DF_REF_REG_DEF_P (*def_rec)
17749 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17750 && (regno1 == DF_REF_REGNO (*def_rec)
17751 || regno2 == DF_REF_REGNO (*def_rec)))
17753 return true;
17756 return false;
17759 /* Function checks if instruction INSN uses register number
17760 REGNO as a part of address expression. */
17762 static bool
17763 insn_uses_reg_mem (unsigned int regno, rtx insn)
17765 df_ref *use_rec;
17767 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17768 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17769 return true;
17771 return false;
17774 /* Search backward for non-agu definition of register number REGNO1
17775 or register number REGNO2 in basic block starting from instruction
17776 START up to head of basic block or instruction INSN.
17778 Function puts true value into *FOUND var if definition was found
17779 and false otherwise.
17781 Distance in half-cycles between START and found instruction or head
17782 of BB is added to DISTANCE and returned. */
17784 static int
17785 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17786 rtx insn, int distance,
17787 rtx start, bool *found)
17789 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17790 rtx prev = start;
17791 rtx next = NULL;
17793 *found = false;
17795 while (prev
17796 && prev != insn
17797 && distance < LEA_SEARCH_THRESHOLD)
17799 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17801 distance = increase_distance (prev, next, distance);
17802 if (insn_defines_reg (regno1, regno2, prev))
17804 if (recog_memoized (prev) < 0
17805 || get_attr_type (prev) != TYPE_LEA)
17807 *found = true;
17808 return distance;
17812 next = prev;
17814 if (prev == BB_HEAD (bb))
17815 break;
17817 prev = PREV_INSN (prev);
17820 return distance;
17823 /* Search backward for non-agu definition of register number REGNO1
17824 or register number REGNO2 in INSN's basic block until
17825 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17826 2. Reach neighbour BBs boundary, or
17827 3. Reach agu definition.
17828 Returns the distance between the non-agu definition point and INSN.
17829 If no definition point, returns -1. */
17831 static int
17832 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17833 rtx insn)
17835 basic_block bb = BLOCK_FOR_INSN (insn);
17836 int distance = 0;
17837 bool found = false;
17839 if (insn != BB_HEAD (bb))
17840 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17841 distance, PREV_INSN (insn),
17842 &found);
17844 if (!found && distance < LEA_SEARCH_THRESHOLD)
17846 edge e;
17847 edge_iterator ei;
17848 bool simple_loop = false;
17850 FOR_EACH_EDGE (e, ei, bb->preds)
17851 if (e->src == bb)
17853 simple_loop = true;
17854 break;
17857 if (simple_loop)
17858 distance = distance_non_agu_define_in_bb (regno1, regno2,
17859 insn, distance,
17860 BB_END (bb), &found);
17861 else
17863 int shortest_dist = -1;
17864 bool found_in_bb = false;
17866 FOR_EACH_EDGE (e, ei, bb->preds)
17868 int bb_dist
17869 = distance_non_agu_define_in_bb (regno1, regno2,
17870 insn, distance,
17871 BB_END (e->src),
17872 &found_in_bb);
17873 if (found_in_bb)
17875 if (shortest_dist < 0)
17876 shortest_dist = bb_dist;
17877 else if (bb_dist > 0)
17878 shortest_dist = MIN (bb_dist, shortest_dist);
17880 found = true;
17884 distance = shortest_dist;
17888 /* get_attr_type may modify recog data. We want to make sure
17889 that recog data is valid for instruction INSN, on which
17890 distance_non_agu_define is called. INSN is unchanged here. */
17891 extract_insn_cached (insn);
17893 if (!found)
17894 return -1;
17896 return distance >> 1;
17899 /* Return the distance in half-cycles between INSN and the next
17900 insn that uses register number REGNO in memory address added
17901 to DISTANCE. Return -1 if REGNO0 is set.
17903 Put true value into *FOUND if register usage was found and
17904 false otherwise.
17905 Put true value into *REDEFINED if register redefinition was
17906 found and false otherwise. */
17908 static int
17909 distance_agu_use_in_bb (unsigned int regno,
17910 rtx insn, int distance, rtx start,
17911 bool *found, bool *redefined)
17913 basic_block bb = NULL;
17914 rtx next = start;
17915 rtx prev = NULL;
17917 *found = false;
17918 *redefined = false;
17920 if (start != NULL_RTX)
17922 bb = BLOCK_FOR_INSN (start);
17923 if (start != BB_HEAD (bb))
17924 /* If insn and start belong to the same bb, set prev to insn,
17925 so the call to increase_distance will increase the distance
17926 between insns by 1. */
17927 prev = insn;
17930 while (next
17931 && next != insn
17932 && distance < LEA_SEARCH_THRESHOLD)
17934 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17936 distance = increase_distance(prev, next, distance);
17937 if (insn_uses_reg_mem (regno, next))
17939 /* Return DISTANCE if OP0 is used in memory
17940 address in NEXT. */
17941 *found = true;
17942 return distance;
17945 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17947 /* Return -1 if OP0 is set in NEXT. */
17948 *redefined = true;
17949 return -1;
17952 prev = next;
17955 if (next == BB_END (bb))
17956 break;
17958 next = NEXT_INSN (next);
17961 return distance;
17964 /* Return the distance between INSN and the next insn that uses
17965 register number REGNO0 in memory address. Return -1 if no such
17966 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17968 static int
17969 distance_agu_use (unsigned int regno0, rtx insn)
17971 basic_block bb = BLOCK_FOR_INSN (insn);
17972 int distance = 0;
17973 bool found = false;
17974 bool redefined = false;
17976 if (insn != BB_END (bb))
17977 distance = distance_agu_use_in_bb (regno0, insn, distance,
17978 NEXT_INSN (insn),
17979 &found, &redefined);
17981 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17983 edge e;
17984 edge_iterator ei;
17985 bool simple_loop = false;
17987 FOR_EACH_EDGE (e, ei, bb->succs)
17988 if (e->dest == bb)
17990 simple_loop = true;
17991 break;
17994 if (simple_loop)
17995 distance = distance_agu_use_in_bb (regno0, insn,
17996 distance, BB_HEAD (bb),
17997 &found, &redefined);
17998 else
18000 int shortest_dist = -1;
18001 bool found_in_bb = false;
18002 bool redefined_in_bb = false;
18004 FOR_EACH_EDGE (e, ei, bb->succs)
18006 int bb_dist
18007 = distance_agu_use_in_bb (regno0, insn,
18008 distance, BB_HEAD (e->dest),
18009 &found_in_bb, &redefined_in_bb);
18010 if (found_in_bb)
18012 if (shortest_dist < 0)
18013 shortest_dist = bb_dist;
18014 else if (bb_dist > 0)
18015 shortest_dist = MIN (bb_dist, shortest_dist);
18017 found = true;
18021 distance = shortest_dist;
18025 if (!found || redefined)
18026 return -1;
18028 return distance >> 1;
18031 /* Define this macro to tune LEA priority vs ADD, it take effect when
18032 there is a dilemma of choicing LEA or ADD
18033 Negative value: ADD is more preferred than LEA
18034 Zero: Netrual
18035 Positive value: LEA is more preferred than ADD*/
18036 #define IX86_LEA_PRIORITY 0
18038 /* Return true if usage of lea INSN has performance advantage
18039 over a sequence of instructions. Instructions sequence has
18040 SPLIT_COST cycles higher latency than lea latency. */
18042 static bool
18043 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18044 unsigned int regno2, int split_cost, bool has_scale)
18046 int dist_define, dist_use;
18048 /* For Silvermont if using a 2-source or 3-source LEA for
18049 non-destructive destination purposes, or due to wanting
18050 ability to use SCALE, the use of LEA is justified. */
18051 if (TARGET_SILVERMONT || TARGET_INTEL)
18053 if (has_scale)
18054 return true;
18055 if (split_cost < 1)
18056 return false;
18057 if (regno0 == regno1 || regno0 == regno2)
18058 return false;
18059 return true;
18062 dist_define = distance_non_agu_define (regno1, regno2, insn);
18063 dist_use = distance_agu_use (regno0, insn);
18065 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18067 /* If there is no non AGU operand definition, no AGU
18068 operand usage and split cost is 0 then both lea
18069 and non lea variants have same priority. Currently
18070 we prefer lea for 64 bit code and non lea on 32 bit
18071 code. */
18072 if (dist_use < 0 && split_cost == 0)
18073 return TARGET_64BIT || IX86_LEA_PRIORITY;
18074 else
18075 return true;
18078 /* With longer definitions distance lea is more preferable.
18079 Here we change it to take into account splitting cost and
18080 lea priority. */
18081 dist_define += split_cost + IX86_LEA_PRIORITY;
18083 /* If there is no use in memory addess then we just check
18084 that split cost exceeds AGU stall. */
18085 if (dist_use < 0)
18086 return dist_define > LEA_MAX_STALL;
18088 /* If this insn has both backward non-agu dependence and forward
18089 agu dependence, the one with short distance takes effect. */
18090 return dist_define >= dist_use;
18093 /* Return true if it is legal to clobber flags by INSN and
18094 false otherwise. */
18096 static bool
18097 ix86_ok_to_clobber_flags (rtx insn)
18099 basic_block bb = BLOCK_FOR_INSN (insn);
18100 df_ref *use;
18101 bitmap live;
18103 while (insn)
18105 if (NONDEBUG_INSN_P (insn))
18107 for (use = DF_INSN_USES (insn); *use; use++)
18108 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18109 return false;
18111 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18112 return true;
18115 if (insn == BB_END (bb))
18116 break;
18118 insn = NEXT_INSN (insn);
18121 live = df_get_live_out(bb);
18122 return !REGNO_REG_SET_P (live, FLAGS_REG);
18125 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18126 move and add to avoid AGU stalls. */
18128 bool
18129 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18131 unsigned int regno0, regno1, regno2;
18133 /* Check if we need to optimize. */
18134 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18135 return false;
18137 /* Check it is correct to split here. */
18138 if (!ix86_ok_to_clobber_flags(insn))
18139 return false;
18141 regno0 = true_regnum (operands[0]);
18142 regno1 = true_regnum (operands[1]);
18143 regno2 = true_regnum (operands[2]);
18145 /* We need to split only adds with non destructive
18146 destination operand. */
18147 if (regno0 == regno1 || regno0 == regno2)
18148 return false;
18149 else
18150 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18153 /* Return true if we should emit lea instruction instead of mov
18154 instruction. */
18156 bool
18157 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18159 unsigned int regno0, regno1;
18161 /* Check if we need to optimize. */
18162 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18163 return false;
18165 /* Use lea for reg to reg moves only. */
18166 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18167 return false;
18169 regno0 = true_regnum (operands[0]);
18170 regno1 = true_regnum (operands[1]);
18172 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18175 /* Return true if we need to split lea into a sequence of
18176 instructions to avoid AGU stalls. */
18178 bool
18179 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18181 unsigned int regno0, regno1, regno2;
18182 int split_cost;
18183 struct ix86_address parts;
18184 int ok;
18186 /* Check we need to optimize. */
18187 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18188 return false;
18190 /* The "at least two components" test below might not catch simple
18191 move or zero extension insns if parts.base is non-NULL and parts.disp
18192 is const0_rtx as the only components in the address, e.g. if the
18193 register is %rbp or %r13. As this test is much cheaper and moves or
18194 zero extensions are the common case, do this check first. */
18195 if (REG_P (operands[1])
18196 || (SImode_address_operand (operands[1], VOIDmode)
18197 && REG_P (XEXP (operands[1], 0))))
18198 return false;
18200 /* Check if it is OK to split here. */
18201 if (!ix86_ok_to_clobber_flags (insn))
18202 return false;
18204 ok = ix86_decompose_address (operands[1], &parts);
18205 gcc_assert (ok);
18207 /* There should be at least two components in the address. */
18208 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18209 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18210 return false;
18212 /* We should not split into add if non legitimate pic
18213 operand is used as displacement. */
18214 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18215 return false;
18217 regno0 = true_regnum (operands[0]) ;
18218 regno1 = INVALID_REGNUM;
18219 regno2 = INVALID_REGNUM;
18221 if (parts.base)
18222 regno1 = true_regnum (parts.base);
18223 if (parts.index)
18224 regno2 = true_regnum (parts.index);
18226 split_cost = 0;
18228 /* Compute how many cycles we will add to execution time
18229 if split lea into a sequence of instructions. */
18230 if (parts.base || parts.index)
18232 /* Have to use mov instruction if non desctructive
18233 destination form is used. */
18234 if (regno1 != regno0 && regno2 != regno0)
18235 split_cost += 1;
18237 /* Have to add index to base if both exist. */
18238 if (parts.base && parts.index)
18239 split_cost += 1;
18241 /* Have to use shift and adds if scale is 2 or greater. */
18242 if (parts.scale > 1)
18244 if (regno0 != regno1)
18245 split_cost += 1;
18246 else if (regno2 == regno0)
18247 split_cost += 4;
18248 else
18249 split_cost += parts.scale;
18252 /* Have to use add instruction with immediate if
18253 disp is non zero. */
18254 if (parts.disp && parts.disp != const0_rtx)
18255 split_cost += 1;
18257 /* Subtract the price of lea. */
18258 split_cost -= 1;
18261 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18262 parts.scale > 1);
18265 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18266 matches destination. RTX includes clobber of FLAGS_REG. */
18268 static void
18269 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18270 rtx dst, rtx src)
18272 rtx op, clob;
18274 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18275 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18277 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18280 /* Return true if regno1 def is nearest to the insn. */
18282 static bool
18283 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18285 rtx prev = insn;
18286 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18288 if (insn == start)
18289 return false;
18290 while (prev && prev != start)
18292 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18294 prev = PREV_INSN (prev);
18295 continue;
18297 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18298 return true;
18299 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18300 return false;
18301 prev = PREV_INSN (prev);
18304 /* None of the regs is defined in the bb. */
18305 return false;
18308 /* Split lea instructions into a sequence of instructions
18309 which are executed on ALU to avoid AGU stalls.
18310 It is assumed that it is allowed to clobber flags register
18311 at lea position. */
18313 void
18314 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18316 unsigned int regno0, regno1, regno2;
18317 struct ix86_address parts;
18318 rtx target, tmp;
18319 int ok, adds;
18321 ok = ix86_decompose_address (operands[1], &parts);
18322 gcc_assert (ok);
18324 target = gen_lowpart (mode, operands[0]);
18326 regno0 = true_regnum (target);
18327 regno1 = INVALID_REGNUM;
18328 regno2 = INVALID_REGNUM;
18330 if (parts.base)
18332 parts.base = gen_lowpart (mode, parts.base);
18333 regno1 = true_regnum (parts.base);
18336 if (parts.index)
18338 parts.index = gen_lowpart (mode, parts.index);
18339 regno2 = true_regnum (parts.index);
18342 if (parts.disp)
18343 parts.disp = gen_lowpart (mode, parts.disp);
18345 if (parts.scale > 1)
18347 /* Case r1 = r1 + ... */
18348 if (regno1 == regno0)
18350 /* If we have a case r1 = r1 + C * r2 then we
18351 should use multiplication which is very
18352 expensive. Assume cost model is wrong if we
18353 have such case here. */
18354 gcc_assert (regno2 != regno0);
18356 for (adds = parts.scale; adds > 0; adds--)
18357 ix86_emit_binop (PLUS, mode, target, parts.index);
18359 else
18361 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18362 if (regno0 != regno2)
18363 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18365 /* Use shift for scaling. */
18366 ix86_emit_binop (ASHIFT, mode, target,
18367 GEN_INT (exact_log2 (parts.scale)));
18369 if (parts.base)
18370 ix86_emit_binop (PLUS, mode, target, parts.base);
18372 if (parts.disp && parts.disp != const0_rtx)
18373 ix86_emit_binop (PLUS, mode, target, parts.disp);
18376 else if (!parts.base && !parts.index)
18378 gcc_assert(parts.disp);
18379 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18381 else
18383 if (!parts.base)
18385 if (regno0 != regno2)
18386 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18388 else if (!parts.index)
18390 if (regno0 != regno1)
18391 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18393 else
18395 if (regno0 == regno1)
18396 tmp = parts.index;
18397 else if (regno0 == regno2)
18398 tmp = parts.base;
18399 else
18401 rtx tmp1;
18403 /* Find better operand for SET instruction, depending
18404 on which definition is farther from the insn. */
18405 if (find_nearest_reg_def (insn, regno1, regno2))
18406 tmp = parts.index, tmp1 = parts.base;
18407 else
18408 tmp = parts.base, tmp1 = parts.index;
18410 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18412 if (parts.disp && parts.disp != const0_rtx)
18413 ix86_emit_binop (PLUS, mode, target, parts.disp);
18415 ix86_emit_binop (PLUS, mode, target, tmp1);
18416 return;
18419 ix86_emit_binop (PLUS, mode, target, tmp);
18422 if (parts.disp && parts.disp != const0_rtx)
18423 ix86_emit_binop (PLUS, mode, target, parts.disp);
18427 /* Return true if it is ok to optimize an ADD operation to LEA
18428 operation to avoid flag register consumation. For most processors,
18429 ADD is faster than LEA. For the processors like BONNELL, if the
18430 destination register of LEA holds an actual address which will be
18431 used soon, LEA is better and otherwise ADD is better. */
18433 bool
18434 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18436 unsigned int regno0 = true_regnum (operands[0]);
18437 unsigned int regno1 = true_regnum (operands[1]);
18438 unsigned int regno2 = true_regnum (operands[2]);
18440 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18441 if (regno0 != regno1 && regno0 != regno2)
18442 return true;
18444 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18445 return false;
18447 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18450 /* Return true if destination reg of SET_BODY is shift count of
18451 USE_BODY. */
18453 static bool
18454 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18456 rtx set_dest;
18457 rtx shift_rtx;
18458 int i;
18460 /* Retrieve destination of SET_BODY. */
18461 switch (GET_CODE (set_body))
18463 case SET:
18464 set_dest = SET_DEST (set_body);
18465 if (!set_dest || !REG_P (set_dest))
18466 return false;
18467 break;
18468 case PARALLEL:
18469 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18470 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18471 use_body))
18472 return true;
18473 default:
18474 return false;
18475 break;
18478 /* Retrieve shift count of USE_BODY. */
18479 switch (GET_CODE (use_body))
18481 case SET:
18482 shift_rtx = XEXP (use_body, 1);
18483 break;
18484 case PARALLEL:
18485 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18486 if (ix86_dep_by_shift_count_body (set_body,
18487 XVECEXP (use_body, 0, i)))
18488 return true;
18489 default:
18490 return false;
18491 break;
18494 if (shift_rtx
18495 && (GET_CODE (shift_rtx) == ASHIFT
18496 || GET_CODE (shift_rtx) == LSHIFTRT
18497 || GET_CODE (shift_rtx) == ASHIFTRT
18498 || GET_CODE (shift_rtx) == ROTATE
18499 || GET_CODE (shift_rtx) == ROTATERT))
18501 rtx shift_count = XEXP (shift_rtx, 1);
18503 /* Return true if shift count is dest of SET_BODY. */
18504 if (REG_P (shift_count))
18506 /* Add check since it can be invoked before register
18507 allocation in pre-reload schedule. */
18508 if (reload_completed
18509 && true_regnum (set_dest) == true_regnum (shift_count))
18510 return true;
18511 else if (REGNO(set_dest) == REGNO(shift_count))
18512 return true;
18516 return false;
18519 /* Return true if destination reg of SET_INSN is shift count of
18520 USE_INSN. */
18522 bool
18523 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18525 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18526 PATTERN (use_insn));
18529 /* Return TRUE or FALSE depending on whether the unary operator meets the
18530 appropriate constraints. */
18532 bool
18533 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18534 enum machine_mode mode ATTRIBUTE_UNUSED,
18535 rtx operands[2])
18537 /* If one of operands is memory, source and destination must match. */
18538 if ((MEM_P (operands[0])
18539 || MEM_P (operands[1]))
18540 && ! rtx_equal_p (operands[0], operands[1]))
18541 return false;
18542 return true;
18545 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18546 are ok, keeping in mind the possible movddup alternative. */
18548 bool
18549 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18551 if (MEM_P (operands[0]))
18552 return rtx_equal_p (operands[0], operands[1 + high]);
18553 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18554 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18555 return true;
18558 /* Post-reload splitter for converting an SF or DFmode value in an
18559 SSE register into an unsigned SImode. */
18561 void
18562 ix86_split_convert_uns_si_sse (rtx operands[])
18564 enum machine_mode vecmode;
18565 rtx value, large, zero_or_two31, input, two31, x;
18567 large = operands[1];
18568 zero_or_two31 = operands[2];
18569 input = operands[3];
18570 two31 = operands[4];
18571 vecmode = GET_MODE (large);
18572 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18574 /* Load up the value into the low element. We must ensure that the other
18575 elements are valid floats -- zero is the easiest such value. */
18576 if (MEM_P (input))
18578 if (vecmode == V4SFmode)
18579 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18580 else
18581 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18583 else
18585 input = gen_rtx_REG (vecmode, REGNO (input));
18586 emit_move_insn (value, CONST0_RTX (vecmode));
18587 if (vecmode == V4SFmode)
18588 emit_insn (gen_sse_movss (value, value, input));
18589 else
18590 emit_insn (gen_sse2_movsd (value, value, input));
18593 emit_move_insn (large, two31);
18594 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18596 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18597 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18599 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18600 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18602 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18603 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18605 large = gen_rtx_REG (V4SImode, REGNO (large));
18606 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18608 x = gen_rtx_REG (V4SImode, REGNO (value));
18609 if (vecmode == V4SFmode)
18610 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18611 else
18612 emit_insn (gen_sse2_cvttpd2dq (x, value));
18613 value = x;
18615 emit_insn (gen_xorv4si3 (value, value, large));
18618 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18619 Expects the 64-bit DImode to be supplied in a pair of integral
18620 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18621 -mfpmath=sse, !optimize_size only. */
18623 void
18624 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18626 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18627 rtx int_xmm, fp_xmm;
18628 rtx biases, exponents;
18629 rtx x;
18631 int_xmm = gen_reg_rtx (V4SImode);
18632 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18633 emit_insn (gen_movdi_to_sse (int_xmm, input));
18634 else if (TARGET_SSE_SPLIT_REGS)
18636 emit_clobber (int_xmm);
18637 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18639 else
18641 x = gen_reg_rtx (V2DImode);
18642 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18643 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18646 x = gen_rtx_CONST_VECTOR (V4SImode,
18647 gen_rtvec (4, GEN_INT (0x43300000UL),
18648 GEN_INT (0x45300000UL),
18649 const0_rtx, const0_rtx));
18650 exponents = validize_mem (force_const_mem (V4SImode, x));
18652 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18653 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18655 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18656 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18657 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18658 (0x1.0p84 + double(fp_value_hi_xmm)).
18659 Note these exponents differ by 32. */
18661 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18663 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18664 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18665 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18666 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18667 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18668 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18669 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18670 biases = validize_mem (force_const_mem (V2DFmode, biases));
18671 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18673 /* Add the upper and lower DFmode values together. */
18674 if (TARGET_SSE3)
18675 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18676 else
18678 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18679 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18680 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18683 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18686 /* Not used, but eases macroization of patterns. */
18687 void
18688 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18689 rtx input ATTRIBUTE_UNUSED)
18691 gcc_unreachable ();
18694 /* Convert an unsigned SImode value into a DFmode. Only currently used
18695 for SSE, but applicable anywhere. */
18697 void
18698 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18700 REAL_VALUE_TYPE TWO31r;
18701 rtx x, fp;
18703 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18704 NULL, 1, OPTAB_DIRECT);
18706 fp = gen_reg_rtx (DFmode);
18707 emit_insn (gen_floatsidf2 (fp, x));
18709 real_ldexp (&TWO31r, &dconst1, 31);
18710 x = const_double_from_real_value (TWO31r, DFmode);
18712 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18713 if (x != target)
18714 emit_move_insn (target, x);
18717 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18718 32-bit mode; otherwise we have a direct convert instruction. */
18720 void
18721 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18723 REAL_VALUE_TYPE TWO32r;
18724 rtx fp_lo, fp_hi, x;
18726 fp_lo = gen_reg_rtx (DFmode);
18727 fp_hi = gen_reg_rtx (DFmode);
18729 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18731 real_ldexp (&TWO32r, &dconst1, 32);
18732 x = const_double_from_real_value (TWO32r, DFmode);
18733 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18735 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18737 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18738 0, OPTAB_DIRECT);
18739 if (x != target)
18740 emit_move_insn (target, x);
18743 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18744 For x86_32, -mfpmath=sse, !optimize_size only. */
18745 void
18746 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18748 REAL_VALUE_TYPE ONE16r;
18749 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18751 real_ldexp (&ONE16r, &dconst1, 16);
18752 x = const_double_from_real_value (ONE16r, SFmode);
18753 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18754 NULL, 0, OPTAB_DIRECT);
18755 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18756 NULL, 0, OPTAB_DIRECT);
18757 fp_hi = gen_reg_rtx (SFmode);
18758 fp_lo = gen_reg_rtx (SFmode);
18759 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18760 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18761 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18762 0, OPTAB_DIRECT);
18763 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18764 0, OPTAB_DIRECT);
18765 if (!rtx_equal_p (target, fp_hi))
18766 emit_move_insn (target, fp_hi);
18769 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18770 a vector of unsigned ints VAL to vector of floats TARGET. */
18772 void
18773 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18775 rtx tmp[8];
18776 REAL_VALUE_TYPE TWO16r;
18777 enum machine_mode intmode = GET_MODE (val);
18778 enum machine_mode fltmode = GET_MODE (target);
18779 rtx (*cvt) (rtx, rtx);
18781 if (intmode == V4SImode)
18782 cvt = gen_floatv4siv4sf2;
18783 else
18784 cvt = gen_floatv8siv8sf2;
18785 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18786 tmp[0] = force_reg (intmode, tmp[0]);
18787 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18788 OPTAB_DIRECT);
18789 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18790 NULL_RTX, 1, OPTAB_DIRECT);
18791 tmp[3] = gen_reg_rtx (fltmode);
18792 emit_insn (cvt (tmp[3], tmp[1]));
18793 tmp[4] = gen_reg_rtx (fltmode);
18794 emit_insn (cvt (tmp[4], tmp[2]));
18795 real_ldexp (&TWO16r, &dconst1, 16);
18796 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18797 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18798 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18799 OPTAB_DIRECT);
18800 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18801 OPTAB_DIRECT);
18802 if (tmp[7] != target)
18803 emit_move_insn (target, tmp[7]);
18806 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18807 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18808 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18809 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18812 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18814 REAL_VALUE_TYPE TWO31r;
18815 rtx two31r, tmp[4];
18816 enum machine_mode mode = GET_MODE (val);
18817 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18818 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18819 rtx (*cmp) (rtx, rtx, rtx, rtx);
18820 int i;
18822 for (i = 0; i < 3; i++)
18823 tmp[i] = gen_reg_rtx (mode);
18824 real_ldexp (&TWO31r, &dconst1, 31);
18825 two31r = const_double_from_real_value (TWO31r, scalarmode);
18826 two31r = ix86_build_const_vector (mode, 1, two31r);
18827 two31r = force_reg (mode, two31r);
18828 switch (mode)
18830 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18831 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18832 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18833 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18834 default: gcc_unreachable ();
18836 tmp[3] = gen_rtx_LE (mode, two31r, val);
18837 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18838 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18839 0, OPTAB_DIRECT);
18840 if (intmode == V4SImode || TARGET_AVX2)
18841 *xorp = expand_simple_binop (intmode, ASHIFT,
18842 gen_lowpart (intmode, tmp[0]),
18843 GEN_INT (31), NULL_RTX, 0,
18844 OPTAB_DIRECT);
18845 else
18847 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18848 two31 = ix86_build_const_vector (intmode, 1, two31);
18849 *xorp = expand_simple_binop (intmode, AND,
18850 gen_lowpart (intmode, tmp[0]),
18851 two31, NULL_RTX, 0,
18852 OPTAB_DIRECT);
18854 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18855 0, OPTAB_DIRECT);
18858 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18859 then replicate the value for all elements of the vector
18860 register. */
18863 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18865 int i, n_elt;
18866 rtvec v;
18867 enum machine_mode scalar_mode;
18869 switch (mode)
18871 case V64QImode:
18872 case V32QImode:
18873 case V16QImode:
18874 case V32HImode:
18875 case V16HImode:
18876 case V8HImode:
18877 case V16SImode:
18878 case V8SImode:
18879 case V4SImode:
18880 case V8DImode:
18881 case V4DImode:
18882 case V2DImode:
18883 gcc_assert (vect);
18884 case V16SFmode:
18885 case V8SFmode:
18886 case V4SFmode:
18887 case V8DFmode:
18888 case V4DFmode:
18889 case V2DFmode:
18890 n_elt = GET_MODE_NUNITS (mode);
18891 v = rtvec_alloc (n_elt);
18892 scalar_mode = GET_MODE_INNER (mode);
18894 RTVEC_ELT (v, 0) = value;
18896 for (i = 1; i < n_elt; ++i)
18897 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18899 return gen_rtx_CONST_VECTOR (mode, v);
18901 default:
18902 gcc_unreachable ();
18906 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18907 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18908 for an SSE register. If VECT is true, then replicate the mask for
18909 all elements of the vector register. If INVERT is true, then create
18910 a mask excluding the sign bit. */
18913 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18915 enum machine_mode vec_mode, imode;
18916 HOST_WIDE_INT hi, lo;
18917 int shift = 63;
18918 rtx v;
18919 rtx mask;
18921 /* Find the sign bit, sign extended to 2*HWI. */
18922 switch (mode)
18924 case V16SImode:
18925 case V16SFmode:
18926 case V8SImode:
18927 case V4SImode:
18928 case V8SFmode:
18929 case V4SFmode:
18930 vec_mode = mode;
18931 mode = GET_MODE_INNER (mode);
18932 imode = SImode;
18933 lo = 0x80000000, hi = lo < 0;
18934 break;
18936 case V8DImode:
18937 case V4DImode:
18938 case V2DImode:
18939 case V8DFmode:
18940 case V4DFmode:
18941 case V2DFmode:
18942 vec_mode = mode;
18943 mode = GET_MODE_INNER (mode);
18944 imode = DImode;
18945 if (HOST_BITS_PER_WIDE_INT >= 64)
18946 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18947 else
18948 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18949 break;
18951 case TImode:
18952 case TFmode:
18953 vec_mode = VOIDmode;
18954 if (HOST_BITS_PER_WIDE_INT >= 64)
18956 imode = TImode;
18957 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18959 else
18961 rtvec vec;
18963 imode = DImode;
18964 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18966 if (invert)
18968 lo = ~lo, hi = ~hi;
18969 v = constm1_rtx;
18971 else
18972 v = const0_rtx;
18974 mask = immed_double_const (lo, hi, imode);
18976 vec = gen_rtvec (2, v, mask);
18977 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18978 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18980 return v;
18982 break;
18984 default:
18985 gcc_unreachable ();
18988 if (invert)
18989 lo = ~lo, hi = ~hi;
18991 /* Force this value into the low part of a fp vector constant. */
18992 mask = immed_double_const (lo, hi, imode);
18993 mask = gen_lowpart (mode, mask);
18995 if (vec_mode == VOIDmode)
18996 return force_reg (mode, mask);
18998 v = ix86_build_const_vector (vec_mode, vect, mask);
18999 return force_reg (vec_mode, v);
19002 /* Generate code for floating point ABS or NEG. */
19004 void
19005 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19006 rtx operands[])
19008 rtx mask, set, dst, src;
19009 bool use_sse = false;
19010 bool vector_mode = VECTOR_MODE_P (mode);
19011 enum machine_mode vmode = mode;
19013 if (vector_mode)
19014 use_sse = true;
19015 else if (mode == TFmode)
19016 use_sse = true;
19017 else if (TARGET_SSE_MATH)
19019 use_sse = SSE_FLOAT_MODE_P (mode);
19020 if (mode == SFmode)
19021 vmode = V4SFmode;
19022 else if (mode == DFmode)
19023 vmode = V2DFmode;
19026 /* NEG and ABS performed with SSE use bitwise mask operations.
19027 Create the appropriate mask now. */
19028 if (use_sse)
19029 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19030 else
19031 mask = NULL_RTX;
19033 dst = operands[0];
19034 src = operands[1];
19036 set = gen_rtx_fmt_e (code, mode, src);
19037 set = gen_rtx_SET (VOIDmode, dst, set);
19039 if (mask)
19041 rtx use, clob;
19042 rtvec par;
19044 use = gen_rtx_USE (VOIDmode, mask);
19045 if (vector_mode)
19046 par = gen_rtvec (2, set, use);
19047 else
19049 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19050 par = gen_rtvec (3, set, use, clob);
19052 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19054 else
19055 emit_insn (set);
19058 /* Expand a copysign operation. Special case operand 0 being a constant. */
19060 void
19061 ix86_expand_copysign (rtx operands[])
19063 enum machine_mode mode, vmode;
19064 rtx dest, op0, op1, mask, nmask;
19066 dest = operands[0];
19067 op0 = operands[1];
19068 op1 = operands[2];
19070 mode = GET_MODE (dest);
19072 if (mode == SFmode)
19073 vmode = V4SFmode;
19074 else if (mode == DFmode)
19075 vmode = V2DFmode;
19076 else
19077 vmode = mode;
19079 if (GET_CODE (op0) == CONST_DOUBLE)
19081 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19083 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19084 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19086 if (mode == SFmode || mode == DFmode)
19088 if (op0 == CONST0_RTX (mode))
19089 op0 = CONST0_RTX (vmode);
19090 else
19092 rtx v = ix86_build_const_vector (vmode, false, op0);
19094 op0 = force_reg (vmode, v);
19097 else if (op0 != CONST0_RTX (mode))
19098 op0 = force_reg (mode, op0);
19100 mask = ix86_build_signbit_mask (vmode, 0, 0);
19102 if (mode == SFmode)
19103 copysign_insn = gen_copysignsf3_const;
19104 else if (mode == DFmode)
19105 copysign_insn = gen_copysigndf3_const;
19106 else
19107 copysign_insn = gen_copysigntf3_const;
19109 emit_insn (copysign_insn (dest, op0, op1, mask));
19111 else
19113 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19115 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19116 mask = ix86_build_signbit_mask (vmode, 0, 0);
19118 if (mode == SFmode)
19119 copysign_insn = gen_copysignsf3_var;
19120 else if (mode == DFmode)
19121 copysign_insn = gen_copysigndf3_var;
19122 else
19123 copysign_insn = gen_copysigntf3_var;
19125 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19129 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19130 be a constant, and so has already been expanded into a vector constant. */
19132 void
19133 ix86_split_copysign_const (rtx operands[])
19135 enum machine_mode mode, vmode;
19136 rtx dest, op0, mask, x;
19138 dest = operands[0];
19139 op0 = operands[1];
19140 mask = operands[3];
19142 mode = GET_MODE (dest);
19143 vmode = GET_MODE (mask);
19145 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19146 x = gen_rtx_AND (vmode, dest, mask);
19147 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19149 if (op0 != CONST0_RTX (vmode))
19151 x = gen_rtx_IOR (vmode, dest, op0);
19152 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19156 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19157 so we have to do two masks. */
19159 void
19160 ix86_split_copysign_var (rtx operands[])
19162 enum machine_mode mode, vmode;
19163 rtx dest, scratch, op0, op1, mask, nmask, x;
19165 dest = operands[0];
19166 scratch = operands[1];
19167 op0 = operands[2];
19168 op1 = operands[3];
19169 nmask = operands[4];
19170 mask = operands[5];
19172 mode = GET_MODE (dest);
19173 vmode = GET_MODE (mask);
19175 if (rtx_equal_p (op0, op1))
19177 /* Shouldn't happen often (it's useless, obviously), but when it does
19178 we'd generate incorrect code if we continue below. */
19179 emit_move_insn (dest, op0);
19180 return;
19183 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19185 gcc_assert (REGNO (op1) == REGNO (scratch));
19187 x = gen_rtx_AND (vmode, scratch, mask);
19188 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19190 dest = mask;
19191 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19192 x = gen_rtx_NOT (vmode, dest);
19193 x = gen_rtx_AND (vmode, x, op0);
19194 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19196 else
19198 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19200 x = gen_rtx_AND (vmode, scratch, mask);
19202 else /* alternative 2,4 */
19204 gcc_assert (REGNO (mask) == REGNO (scratch));
19205 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19206 x = gen_rtx_AND (vmode, scratch, op1);
19208 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19210 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19212 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19213 x = gen_rtx_AND (vmode, dest, nmask);
19215 else /* alternative 3,4 */
19217 gcc_assert (REGNO (nmask) == REGNO (dest));
19218 dest = nmask;
19219 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19220 x = gen_rtx_AND (vmode, dest, op0);
19222 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19225 x = gen_rtx_IOR (vmode, dest, scratch);
19226 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19229 /* Return TRUE or FALSE depending on whether the first SET in INSN
19230 has source and destination with matching CC modes, and that the
19231 CC mode is at least as constrained as REQ_MODE. */
19233 bool
19234 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19236 rtx set;
19237 enum machine_mode set_mode;
19239 set = PATTERN (insn);
19240 if (GET_CODE (set) == PARALLEL)
19241 set = XVECEXP (set, 0, 0);
19242 gcc_assert (GET_CODE (set) == SET);
19243 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19245 set_mode = GET_MODE (SET_DEST (set));
19246 switch (set_mode)
19248 case CCNOmode:
19249 if (req_mode != CCNOmode
19250 && (req_mode != CCmode
19251 || XEXP (SET_SRC (set), 1) != const0_rtx))
19252 return false;
19253 break;
19254 case CCmode:
19255 if (req_mode == CCGCmode)
19256 return false;
19257 /* FALLTHRU */
19258 case CCGCmode:
19259 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19260 return false;
19261 /* FALLTHRU */
19262 case CCGOCmode:
19263 if (req_mode == CCZmode)
19264 return false;
19265 /* FALLTHRU */
19266 case CCZmode:
19267 break;
19269 case CCAmode:
19270 case CCCmode:
19271 case CCOmode:
19272 case CCSmode:
19273 if (set_mode != req_mode)
19274 return false;
19275 break;
19277 default:
19278 gcc_unreachable ();
19281 return GET_MODE (SET_SRC (set)) == set_mode;
19284 /* Generate insn patterns to do an integer compare of OPERANDS. */
19286 static rtx
19287 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19289 enum machine_mode cmpmode;
19290 rtx tmp, flags;
19292 cmpmode = SELECT_CC_MODE (code, op0, op1);
19293 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19295 /* This is very simple, but making the interface the same as in the
19296 FP case makes the rest of the code easier. */
19297 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19298 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19300 /* Return the test that should be put into the flags user, i.e.
19301 the bcc, scc, or cmov instruction. */
19302 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19305 /* Figure out whether to use ordered or unordered fp comparisons.
19306 Return the appropriate mode to use. */
19308 enum machine_mode
19309 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19311 /* ??? In order to make all comparisons reversible, we do all comparisons
19312 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19313 all forms trapping and nontrapping comparisons, we can make inequality
19314 comparisons trapping again, since it results in better code when using
19315 FCOM based compares. */
19316 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19319 enum machine_mode
19320 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19322 enum machine_mode mode = GET_MODE (op0);
19324 if (SCALAR_FLOAT_MODE_P (mode))
19326 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19327 return ix86_fp_compare_mode (code);
19330 switch (code)
19332 /* Only zero flag is needed. */
19333 case EQ: /* ZF=0 */
19334 case NE: /* ZF!=0 */
19335 return CCZmode;
19336 /* Codes needing carry flag. */
19337 case GEU: /* CF=0 */
19338 case LTU: /* CF=1 */
19339 /* Detect overflow checks. They need just the carry flag. */
19340 if (GET_CODE (op0) == PLUS
19341 && rtx_equal_p (op1, XEXP (op0, 0)))
19342 return CCCmode;
19343 else
19344 return CCmode;
19345 case GTU: /* CF=0 & ZF=0 */
19346 case LEU: /* CF=1 | ZF=1 */
19347 return CCmode;
19348 /* Codes possibly doable only with sign flag when
19349 comparing against zero. */
19350 case GE: /* SF=OF or SF=0 */
19351 case LT: /* SF<>OF or SF=1 */
19352 if (op1 == const0_rtx)
19353 return CCGOCmode;
19354 else
19355 /* For other cases Carry flag is not required. */
19356 return CCGCmode;
19357 /* Codes doable only with sign flag when comparing
19358 against zero, but we miss jump instruction for it
19359 so we need to use relational tests against overflow
19360 that thus needs to be zero. */
19361 case GT: /* ZF=0 & SF=OF */
19362 case LE: /* ZF=1 | SF<>OF */
19363 if (op1 == const0_rtx)
19364 return CCNOmode;
19365 else
19366 return CCGCmode;
19367 /* strcmp pattern do (use flags) and combine may ask us for proper
19368 mode. */
19369 case USE:
19370 return CCmode;
19371 default:
19372 gcc_unreachable ();
19376 /* Return the fixed registers used for condition codes. */
19378 static bool
19379 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19381 *p1 = FLAGS_REG;
19382 *p2 = FPSR_REG;
19383 return true;
19386 /* If two condition code modes are compatible, return a condition code
19387 mode which is compatible with both. Otherwise, return
19388 VOIDmode. */
19390 static enum machine_mode
19391 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19393 if (m1 == m2)
19394 return m1;
19396 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19397 return VOIDmode;
19399 if ((m1 == CCGCmode && m2 == CCGOCmode)
19400 || (m1 == CCGOCmode && m2 == CCGCmode))
19401 return CCGCmode;
19403 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19404 return m2;
19405 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19406 return m1;
19408 switch (m1)
19410 default:
19411 gcc_unreachable ();
19413 case CCmode:
19414 case CCGCmode:
19415 case CCGOCmode:
19416 case CCNOmode:
19417 case CCAmode:
19418 case CCCmode:
19419 case CCOmode:
19420 case CCSmode:
19421 case CCZmode:
19422 switch (m2)
19424 default:
19425 return VOIDmode;
19427 case CCmode:
19428 case CCGCmode:
19429 case CCGOCmode:
19430 case CCNOmode:
19431 case CCAmode:
19432 case CCCmode:
19433 case CCOmode:
19434 case CCSmode:
19435 case CCZmode:
19436 return CCmode;
19439 case CCFPmode:
19440 case CCFPUmode:
19441 /* These are only compatible with themselves, which we already
19442 checked above. */
19443 return VOIDmode;
19448 /* Return a comparison we can do and that it is equivalent to
19449 swap_condition (code) apart possibly from orderedness.
19450 But, never change orderedness if TARGET_IEEE_FP, returning
19451 UNKNOWN in that case if necessary. */
19453 static enum rtx_code
19454 ix86_fp_swap_condition (enum rtx_code code)
19456 switch (code)
19458 case GT: /* GTU - CF=0 & ZF=0 */
19459 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19460 case GE: /* GEU - CF=0 */
19461 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19462 case UNLT: /* LTU - CF=1 */
19463 return TARGET_IEEE_FP ? UNKNOWN : GT;
19464 case UNLE: /* LEU - CF=1 | ZF=1 */
19465 return TARGET_IEEE_FP ? UNKNOWN : GE;
19466 default:
19467 return swap_condition (code);
19471 /* Return cost of comparison CODE using the best strategy for performance.
19472 All following functions do use number of instructions as a cost metrics.
19473 In future this should be tweaked to compute bytes for optimize_size and
19474 take into account performance of various instructions on various CPUs. */
19476 static int
19477 ix86_fp_comparison_cost (enum rtx_code code)
19479 int arith_cost;
19481 /* The cost of code using bit-twiddling on %ah. */
19482 switch (code)
19484 case UNLE:
19485 case UNLT:
19486 case LTGT:
19487 case GT:
19488 case GE:
19489 case UNORDERED:
19490 case ORDERED:
19491 case UNEQ:
19492 arith_cost = 4;
19493 break;
19494 case LT:
19495 case NE:
19496 case EQ:
19497 case UNGE:
19498 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19499 break;
19500 case LE:
19501 case UNGT:
19502 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19503 break;
19504 default:
19505 gcc_unreachable ();
19508 switch (ix86_fp_comparison_strategy (code))
19510 case IX86_FPCMP_COMI:
19511 return arith_cost > 4 ? 3 : 2;
19512 case IX86_FPCMP_SAHF:
19513 return arith_cost > 4 ? 4 : 3;
19514 default:
19515 return arith_cost;
19519 /* Return strategy to use for floating-point. We assume that fcomi is always
19520 preferrable where available, since that is also true when looking at size
19521 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19523 enum ix86_fpcmp_strategy
19524 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19526 /* Do fcomi/sahf based test when profitable. */
19528 if (TARGET_CMOVE)
19529 return IX86_FPCMP_COMI;
19531 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19532 return IX86_FPCMP_SAHF;
19534 return IX86_FPCMP_ARITH;
19537 /* Swap, force into registers, or otherwise massage the two operands
19538 to a fp comparison. The operands are updated in place; the new
19539 comparison code is returned. */
19541 static enum rtx_code
19542 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19544 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19545 rtx op0 = *pop0, op1 = *pop1;
19546 enum machine_mode op_mode = GET_MODE (op0);
19547 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19549 /* All of the unordered compare instructions only work on registers.
19550 The same is true of the fcomi compare instructions. The XFmode
19551 compare instructions require registers except when comparing
19552 against zero or when converting operand 1 from fixed point to
19553 floating point. */
19555 if (!is_sse
19556 && (fpcmp_mode == CCFPUmode
19557 || (op_mode == XFmode
19558 && ! (standard_80387_constant_p (op0) == 1
19559 || standard_80387_constant_p (op1) == 1)
19560 && GET_CODE (op1) != FLOAT)
19561 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19563 op0 = force_reg (op_mode, op0);
19564 op1 = force_reg (op_mode, op1);
19566 else
19568 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19569 things around if they appear profitable, otherwise force op0
19570 into a register. */
19572 if (standard_80387_constant_p (op0) == 0
19573 || (MEM_P (op0)
19574 && ! (standard_80387_constant_p (op1) == 0
19575 || MEM_P (op1))))
19577 enum rtx_code new_code = ix86_fp_swap_condition (code);
19578 if (new_code != UNKNOWN)
19580 rtx tmp;
19581 tmp = op0, op0 = op1, op1 = tmp;
19582 code = new_code;
19586 if (!REG_P (op0))
19587 op0 = force_reg (op_mode, op0);
19589 if (CONSTANT_P (op1))
19591 int tmp = standard_80387_constant_p (op1);
19592 if (tmp == 0)
19593 op1 = validize_mem (force_const_mem (op_mode, op1));
19594 else if (tmp == 1)
19596 if (TARGET_CMOVE)
19597 op1 = force_reg (op_mode, op1);
19599 else
19600 op1 = force_reg (op_mode, op1);
19604 /* Try to rearrange the comparison to make it cheaper. */
19605 if (ix86_fp_comparison_cost (code)
19606 > ix86_fp_comparison_cost (swap_condition (code))
19607 && (REG_P (op1) || can_create_pseudo_p ()))
19609 rtx tmp;
19610 tmp = op0, op0 = op1, op1 = tmp;
19611 code = swap_condition (code);
19612 if (!REG_P (op0))
19613 op0 = force_reg (op_mode, op0);
19616 *pop0 = op0;
19617 *pop1 = op1;
19618 return code;
19621 /* Convert comparison codes we use to represent FP comparison to integer
19622 code that will result in proper branch. Return UNKNOWN if no such code
19623 is available. */
19625 enum rtx_code
19626 ix86_fp_compare_code_to_integer (enum rtx_code code)
19628 switch (code)
19630 case GT:
19631 return GTU;
19632 case GE:
19633 return GEU;
19634 case ORDERED:
19635 case UNORDERED:
19636 return code;
19637 break;
19638 case UNEQ:
19639 return EQ;
19640 break;
19641 case UNLT:
19642 return LTU;
19643 break;
19644 case UNLE:
19645 return LEU;
19646 break;
19647 case LTGT:
19648 return NE;
19649 break;
19650 default:
19651 return UNKNOWN;
19655 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19657 static rtx
19658 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19660 enum machine_mode fpcmp_mode, intcmp_mode;
19661 rtx tmp, tmp2;
19663 fpcmp_mode = ix86_fp_compare_mode (code);
19664 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19666 /* Do fcomi/sahf based test when profitable. */
19667 switch (ix86_fp_comparison_strategy (code))
19669 case IX86_FPCMP_COMI:
19670 intcmp_mode = fpcmp_mode;
19671 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19672 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19673 tmp);
19674 emit_insn (tmp);
19675 break;
19677 case IX86_FPCMP_SAHF:
19678 intcmp_mode = fpcmp_mode;
19679 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19680 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19681 tmp);
19683 if (!scratch)
19684 scratch = gen_reg_rtx (HImode);
19685 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19686 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19687 break;
19689 case IX86_FPCMP_ARITH:
19690 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19691 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19692 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19693 if (!scratch)
19694 scratch = gen_reg_rtx (HImode);
19695 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19697 /* In the unordered case, we have to check C2 for NaN's, which
19698 doesn't happen to work out to anything nice combination-wise.
19699 So do some bit twiddling on the value we've got in AH to come
19700 up with an appropriate set of condition codes. */
19702 intcmp_mode = CCNOmode;
19703 switch (code)
19705 case GT:
19706 case UNGT:
19707 if (code == GT || !TARGET_IEEE_FP)
19709 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19710 code = EQ;
19712 else
19714 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19715 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19716 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19717 intcmp_mode = CCmode;
19718 code = GEU;
19720 break;
19721 case LT:
19722 case UNLT:
19723 if (code == LT && TARGET_IEEE_FP)
19725 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19726 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19727 intcmp_mode = CCmode;
19728 code = EQ;
19730 else
19732 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19733 code = NE;
19735 break;
19736 case GE:
19737 case UNGE:
19738 if (code == GE || !TARGET_IEEE_FP)
19740 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19741 code = EQ;
19743 else
19745 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19746 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19747 code = NE;
19749 break;
19750 case LE:
19751 case UNLE:
19752 if (code == LE && TARGET_IEEE_FP)
19754 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19755 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19756 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19757 intcmp_mode = CCmode;
19758 code = LTU;
19760 else
19762 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19763 code = NE;
19765 break;
19766 case EQ:
19767 case UNEQ:
19768 if (code == EQ && TARGET_IEEE_FP)
19770 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19771 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19772 intcmp_mode = CCmode;
19773 code = EQ;
19775 else
19777 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19778 code = NE;
19780 break;
19781 case NE:
19782 case LTGT:
19783 if (code == NE && TARGET_IEEE_FP)
19785 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19786 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19787 GEN_INT (0x40)));
19788 code = NE;
19790 else
19792 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19793 code = EQ;
19795 break;
19797 case UNORDERED:
19798 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19799 code = NE;
19800 break;
19801 case ORDERED:
19802 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19803 code = EQ;
19804 break;
19806 default:
19807 gcc_unreachable ();
19809 break;
19811 default:
19812 gcc_unreachable();
19815 /* Return the test that should be put into the flags user, i.e.
19816 the bcc, scc, or cmov instruction. */
19817 return gen_rtx_fmt_ee (code, VOIDmode,
19818 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19819 const0_rtx);
19822 static rtx
19823 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19825 rtx ret;
19827 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19828 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19830 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19832 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19833 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19835 else
19836 ret = ix86_expand_int_compare (code, op0, op1);
19838 return ret;
19841 void
19842 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19844 enum machine_mode mode = GET_MODE (op0);
19845 rtx tmp;
19847 switch (mode)
19849 case SFmode:
19850 case DFmode:
19851 case XFmode:
19852 case QImode:
19853 case HImode:
19854 case SImode:
19855 simple:
19856 tmp = ix86_expand_compare (code, op0, op1);
19857 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19858 gen_rtx_LABEL_REF (VOIDmode, label),
19859 pc_rtx);
19860 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19861 return;
19863 case DImode:
19864 if (TARGET_64BIT)
19865 goto simple;
19866 case TImode:
19867 /* Expand DImode branch into multiple compare+branch. */
19869 rtx lo[2], hi[2], label2;
19870 enum rtx_code code1, code2, code3;
19871 enum machine_mode submode;
19873 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19875 tmp = op0, op0 = op1, op1 = tmp;
19876 code = swap_condition (code);
19879 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19880 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19882 submode = mode == DImode ? SImode : DImode;
19884 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19885 avoid two branches. This costs one extra insn, so disable when
19886 optimizing for size. */
19888 if ((code == EQ || code == NE)
19889 && (!optimize_insn_for_size_p ()
19890 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19892 rtx xor0, xor1;
19894 xor1 = hi[0];
19895 if (hi[1] != const0_rtx)
19896 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19897 NULL_RTX, 0, OPTAB_WIDEN);
19899 xor0 = lo[0];
19900 if (lo[1] != const0_rtx)
19901 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19902 NULL_RTX, 0, OPTAB_WIDEN);
19904 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19905 NULL_RTX, 0, OPTAB_WIDEN);
19907 ix86_expand_branch (code, tmp, const0_rtx, label);
19908 return;
19911 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19912 op1 is a constant and the low word is zero, then we can just
19913 examine the high word. Similarly for low word -1 and
19914 less-or-equal-than or greater-than. */
19916 if (CONST_INT_P (hi[1]))
19917 switch (code)
19919 case LT: case LTU: case GE: case GEU:
19920 if (lo[1] == const0_rtx)
19922 ix86_expand_branch (code, hi[0], hi[1], label);
19923 return;
19925 break;
19926 case LE: case LEU: case GT: case GTU:
19927 if (lo[1] == constm1_rtx)
19929 ix86_expand_branch (code, hi[0], hi[1], label);
19930 return;
19932 break;
19933 default:
19934 break;
19937 /* Otherwise, we need two or three jumps. */
19939 label2 = gen_label_rtx ();
19941 code1 = code;
19942 code2 = swap_condition (code);
19943 code3 = unsigned_condition (code);
19945 switch (code)
19947 case LT: case GT: case LTU: case GTU:
19948 break;
19950 case LE: code1 = LT; code2 = GT; break;
19951 case GE: code1 = GT; code2 = LT; break;
19952 case LEU: code1 = LTU; code2 = GTU; break;
19953 case GEU: code1 = GTU; code2 = LTU; break;
19955 case EQ: code1 = UNKNOWN; code2 = NE; break;
19956 case NE: code2 = UNKNOWN; break;
19958 default:
19959 gcc_unreachable ();
19963 * a < b =>
19964 * if (hi(a) < hi(b)) goto true;
19965 * if (hi(a) > hi(b)) goto false;
19966 * if (lo(a) < lo(b)) goto true;
19967 * false:
19970 if (code1 != UNKNOWN)
19971 ix86_expand_branch (code1, hi[0], hi[1], label);
19972 if (code2 != UNKNOWN)
19973 ix86_expand_branch (code2, hi[0], hi[1], label2);
19975 ix86_expand_branch (code3, lo[0], lo[1], label);
19977 if (code2 != UNKNOWN)
19978 emit_label (label2);
19979 return;
19982 default:
19983 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19984 goto simple;
19988 /* Split branch based on floating point condition. */
19989 void
19990 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19991 rtx target1, rtx target2, rtx tmp)
19993 rtx condition;
19994 rtx i;
19996 if (target2 != pc_rtx)
19998 rtx tmp = target2;
19999 code = reverse_condition_maybe_unordered (code);
20000 target2 = target1;
20001 target1 = tmp;
20004 condition = ix86_expand_fp_compare (code, op1, op2,
20005 tmp);
20007 i = emit_jump_insn (gen_rtx_SET
20008 (VOIDmode, pc_rtx,
20009 gen_rtx_IF_THEN_ELSE (VOIDmode,
20010 condition, target1, target2)));
20011 if (split_branch_probability >= 0)
20012 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20015 void
20016 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20018 rtx ret;
20020 gcc_assert (GET_MODE (dest) == QImode);
20022 ret = ix86_expand_compare (code, op0, op1);
20023 PUT_MODE (ret, QImode);
20024 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20027 /* Expand comparison setting or clearing carry flag. Return true when
20028 successful and set pop for the operation. */
20029 static bool
20030 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20032 enum machine_mode mode =
20033 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20035 /* Do not handle double-mode compares that go through special path. */
20036 if (mode == (TARGET_64BIT ? TImode : DImode))
20037 return false;
20039 if (SCALAR_FLOAT_MODE_P (mode))
20041 rtx compare_op, compare_seq;
20043 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20045 /* Shortcut: following common codes never translate
20046 into carry flag compares. */
20047 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20048 || code == ORDERED || code == UNORDERED)
20049 return false;
20051 /* These comparisons require zero flag; swap operands so they won't. */
20052 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20053 && !TARGET_IEEE_FP)
20055 rtx tmp = op0;
20056 op0 = op1;
20057 op1 = tmp;
20058 code = swap_condition (code);
20061 /* Try to expand the comparison and verify that we end up with
20062 carry flag based comparison. This fails to be true only when
20063 we decide to expand comparison using arithmetic that is not
20064 too common scenario. */
20065 start_sequence ();
20066 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20067 compare_seq = get_insns ();
20068 end_sequence ();
20070 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20071 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20072 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20073 else
20074 code = GET_CODE (compare_op);
20076 if (code != LTU && code != GEU)
20077 return false;
20079 emit_insn (compare_seq);
20080 *pop = compare_op;
20081 return true;
20084 if (!INTEGRAL_MODE_P (mode))
20085 return false;
20087 switch (code)
20089 case LTU:
20090 case GEU:
20091 break;
20093 /* Convert a==0 into (unsigned)a<1. */
20094 case EQ:
20095 case NE:
20096 if (op1 != const0_rtx)
20097 return false;
20098 op1 = const1_rtx;
20099 code = (code == EQ ? LTU : GEU);
20100 break;
20102 /* Convert a>b into b<a or a>=b-1. */
20103 case GTU:
20104 case LEU:
20105 if (CONST_INT_P (op1))
20107 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20108 /* Bail out on overflow. We still can swap operands but that
20109 would force loading of the constant into register. */
20110 if (op1 == const0_rtx
20111 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20112 return false;
20113 code = (code == GTU ? GEU : LTU);
20115 else
20117 rtx tmp = op1;
20118 op1 = op0;
20119 op0 = tmp;
20120 code = (code == GTU ? LTU : GEU);
20122 break;
20124 /* Convert a>=0 into (unsigned)a<0x80000000. */
20125 case LT:
20126 case GE:
20127 if (mode == DImode || op1 != const0_rtx)
20128 return false;
20129 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20130 code = (code == LT ? GEU : LTU);
20131 break;
20132 case LE:
20133 case GT:
20134 if (mode == DImode || op1 != constm1_rtx)
20135 return false;
20136 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20137 code = (code == LE ? GEU : LTU);
20138 break;
20140 default:
20141 return false;
20143 /* Swapping operands may cause constant to appear as first operand. */
20144 if (!nonimmediate_operand (op0, VOIDmode))
20146 if (!can_create_pseudo_p ())
20147 return false;
20148 op0 = force_reg (mode, op0);
20150 *pop = ix86_expand_compare (code, op0, op1);
20151 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20152 return true;
20155 bool
20156 ix86_expand_int_movcc (rtx operands[])
20158 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20159 rtx compare_seq, compare_op;
20160 enum machine_mode mode = GET_MODE (operands[0]);
20161 bool sign_bit_compare_p = false;
20162 rtx op0 = XEXP (operands[1], 0);
20163 rtx op1 = XEXP (operands[1], 1);
20165 if (GET_MODE (op0) == TImode
20166 || (GET_MODE (op0) == DImode
20167 && !TARGET_64BIT))
20168 return false;
20170 start_sequence ();
20171 compare_op = ix86_expand_compare (code, op0, op1);
20172 compare_seq = get_insns ();
20173 end_sequence ();
20175 compare_code = GET_CODE (compare_op);
20177 if ((op1 == const0_rtx && (code == GE || code == LT))
20178 || (op1 == constm1_rtx && (code == GT || code == LE)))
20179 sign_bit_compare_p = true;
20181 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20182 HImode insns, we'd be swallowed in word prefix ops. */
20184 if ((mode != HImode || TARGET_FAST_PREFIX)
20185 && (mode != (TARGET_64BIT ? TImode : DImode))
20186 && CONST_INT_P (operands[2])
20187 && CONST_INT_P (operands[3]))
20189 rtx out = operands[0];
20190 HOST_WIDE_INT ct = INTVAL (operands[2]);
20191 HOST_WIDE_INT cf = INTVAL (operands[3]);
20192 HOST_WIDE_INT diff;
20194 diff = ct - cf;
20195 /* Sign bit compares are better done using shifts than we do by using
20196 sbb. */
20197 if (sign_bit_compare_p
20198 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20200 /* Detect overlap between destination and compare sources. */
20201 rtx tmp = out;
20203 if (!sign_bit_compare_p)
20205 rtx flags;
20206 bool fpcmp = false;
20208 compare_code = GET_CODE (compare_op);
20210 flags = XEXP (compare_op, 0);
20212 if (GET_MODE (flags) == CCFPmode
20213 || GET_MODE (flags) == CCFPUmode)
20215 fpcmp = true;
20216 compare_code
20217 = ix86_fp_compare_code_to_integer (compare_code);
20220 /* To simplify rest of code, restrict to the GEU case. */
20221 if (compare_code == LTU)
20223 HOST_WIDE_INT tmp = ct;
20224 ct = cf;
20225 cf = tmp;
20226 compare_code = reverse_condition (compare_code);
20227 code = reverse_condition (code);
20229 else
20231 if (fpcmp)
20232 PUT_CODE (compare_op,
20233 reverse_condition_maybe_unordered
20234 (GET_CODE (compare_op)));
20235 else
20236 PUT_CODE (compare_op,
20237 reverse_condition (GET_CODE (compare_op)));
20239 diff = ct - cf;
20241 if (reg_overlap_mentioned_p (out, op0)
20242 || reg_overlap_mentioned_p (out, op1))
20243 tmp = gen_reg_rtx (mode);
20245 if (mode == DImode)
20246 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20247 else
20248 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20249 flags, compare_op));
20251 else
20253 if (code == GT || code == GE)
20254 code = reverse_condition (code);
20255 else
20257 HOST_WIDE_INT tmp = ct;
20258 ct = cf;
20259 cf = tmp;
20260 diff = ct - cf;
20262 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20265 if (diff == 1)
20268 * cmpl op0,op1
20269 * sbbl dest,dest
20270 * [addl dest, ct]
20272 * Size 5 - 8.
20274 if (ct)
20275 tmp = expand_simple_binop (mode, PLUS,
20276 tmp, GEN_INT (ct),
20277 copy_rtx (tmp), 1, OPTAB_DIRECT);
20279 else if (cf == -1)
20282 * cmpl op0,op1
20283 * sbbl dest,dest
20284 * orl $ct, dest
20286 * Size 8.
20288 tmp = expand_simple_binop (mode, IOR,
20289 tmp, GEN_INT (ct),
20290 copy_rtx (tmp), 1, OPTAB_DIRECT);
20292 else if (diff == -1 && ct)
20295 * cmpl op0,op1
20296 * sbbl dest,dest
20297 * notl dest
20298 * [addl dest, cf]
20300 * Size 8 - 11.
20302 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20303 if (cf)
20304 tmp = expand_simple_binop (mode, PLUS,
20305 copy_rtx (tmp), GEN_INT (cf),
20306 copy_rtx (tmp), 1, OPTAB_DIRECT);
20308 else
20311 * cmpl op0,op1
20312 * sbbl dest,dest
20313 * [notl dest]
20314 * andl cf - ct, dest
20315 * [addl dest, ct]
20317 * Size 8 - 11.
20320 if (cf == 0)
20322 cf = ct;
20323 ct = 0;
20324 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20327 tmp = expand_simple_binop (mode, AND,
20328 copy_rtx (tmp),
20329 gen_int_mode (cf - ct, mode),
20330 copy_rtx (tmp), 1, OPTAB_DIRECT);
20331 if (ct)
20332 tmp = expand_simple_binop (mode, PLUS,
20333 copy_rtx (tmp), GEN_INT (ct),
20334 copy_rtx (tmp), 1, OPTAB_DIRECT);
20337 if (!rtx_equal_p (tmp, out))
20338 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20340 return true;
20343 if (diff < 0)
20345 enum machine_mode cmp_mode = GET_MODE (op0);
20347 HOST_WIDE_INT tmp;
20348 tmp = ct, ct = cf, cf = tmp;
20349 diff = -diff;
20351 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20353 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20355 /* We may be reversing unordered compare to normal compare, that
20356 is not valid in general (we may convert non-trapping condition
20357 to trapping one), however on i386 we currently emit all
20358 comparisons unordered. */
20359 compare_code = reverse_condition_maybe_unordered (compare_code);
20360 code = reverse_condition_maybe_unordered (code);
20362 else
20364 compare_code = reverse_condition (compare_code);
20365 code = reverse_condition (code);
20369 compare_code = UNKNOWN;
20370 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20371 && CONST_INT_P (op1))
20373 if (op1 == const0_rtx
20374 && (code == LT || code == GE))
20375 compare_code = code;
20376 else if (op1 == constm1_rtx)
20378 if (code == LE)
20379 compare_code = LT;
20380 else if (code == GT)
20381 compare_code = GE;
20385 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20386 if (compare_code != UNKNOWN
20387 && GET_MODE (op0) == GET_MODE (out)
20388 && (cf == -1 || ct == -1))
20390 /* If lea code below could be used, only optimize
20391 if it results in a 2 insn sequence. */
20393 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20394 || diff == 3 || diff == 5 || diff == 9)
20395 || (compare_code == LT && ct == -1)
20396 || (compare_code == GE && cf == -1))
20399 * notl op1 (if necessary)
20400 * sarl $31, op1
20401 * orl cf, op1
20403 if (ct != -1)
20405 cf = ct;
20406 ct = -1;
20407 code = reverse_condition (code);
20410 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20412 out = expand_simple_binop (mode, IOR,
20413 out, GEN_INT (cf),
20414 out, 1, OPTAB_DIRECT);
20415 if (out != operands[0])
20416 emit_move_insn (operands[0], out);
20418 return true;
20423 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20424 || diff == 3 || diff == 5 || diff == 9)
20425 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20426 && (mode != DImode
20427 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20430 * xorl dest,dest
20431 * cmpl op1,op2
20432 * setcc dest
20433 * lea cf(dest*(ct-cf)),dest
20435 * Size 14.
20437 * This also catches the degenerate setcc-only case.
20440 rtx tmp;
20441 int nops;
20443 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20445 nops = 0;
20446 /* On x86_64 the lea instruction operates on Pmode, so we need
20447 to get arithmetics done in proper mode to match. */
20448 if (diff == 1)
20449 tmp = copy_rtx (out);
20450 else
20452 rtx out1;
20453 out1 = copy_rtx (out);
20454 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20455 nops++;
20456 if (diff & 1)
20458 tmp = gen_rtx_PLUS (mode, tmp, out1);
20459 nops++;
20462 if (cf != 0)
20464 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20465 nops++;
20467 if (!rtx_equal_p (tmp, out))
20469 if (nops == 1)
20470 out = force_operand (tmp, copy_rtx (out));
20471 else
20472 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20474 if (!rtx_equal_p (out, operands[0]))
20475 emit_move_insn (operands[0], copy_rtx (out));
20477 return true;
20481 * General case: Jumpful:
20482 * xorl dest,dest cmpl op1, op2
20483 * cmpl op1, op2 movl ct, dest
20484 * setcc dest jcc 1f
20485 * decl dest movl cf, dest
20486 * andl (cf-ct),dest 1:
20487 * addl ct,dest
20489 * Size 20. Size 14.
20491 * This is reasonably steep, but branch mispredict costs are
20492 * high on modern cpus, so consider failing only if optimizing
20493 * for space.
20496 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20497 && BRANCH_COST (optimize_insn_for_speed_p (),
20498 false) >= 2)
20500 if (cf == 0)
20502 enum machine_mode cmp_mode = GET_MODE (op0);
20504 cf = ct;
20505 ct = 0;
20507 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20509 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20511 /* We may be reversing unordered compare to normal compare,
20512 that is not valid in general (we may convert non-trapping
20513 condition to trapping one), however on i386 we currently
20514 emit all comparisons unordered. */
20515 code = reverse_condition_maybe_unordered (code);
20517 else
20519 code = reverse_condition (code);
20520 if (compare_code != UNKNOWN)
20521 compare_code = reverse_condition (compare_code);
20525 if (compare_code != UNKNOWN)
20527 /* notl op1 (if needed)
20528 sarl $31, op1
20529 andl (cf-ct), op1
20530 addl ct, op1
20532 For x < 0 (resp. x <= -1) there will be no notl,
20533 so if possible swap the constants to get rid of the
20534 complement.
20535 True/false will be -1/0 while code below (store flag
20536 followed by decrement) is 0/-1, so the constants need
20537 to be exchanged once more. */
20539 if (compare_code == GE || !cf)
20541 code = reverse_condition (code);
20542 compare_code = LT;
20544 else
20546 HOST_WIDE_INT tmp = cf;
20547 cf = ct;
20548 ct = tmp;
20551 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20553 else
20555 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20557 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20558 constm1_rtx,
20559 copy_rtx (out), 1, OPTAB_DIRECT);
20562 out = expand_simple_binop (mode, AND, copy_rtx (out),
20563 gen_int_mode (cf - ct, mode),
20564 copy_rtx (out), 1, OPTAB_DIRECT);
20565 if (ct)
20566 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20567 copy_rtx (out), 1, OPTAB_DIRECT);
20568 if (!rtx_equal_p (out, operands[0]))
20569 emit_move_insn (operands[0], copy_rtx (out));
20571 return true;
20575 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20577 /* Try a few things more with specific constants and a variable. */
20579 optab op;
20580 rtx var, orig_out, out, tmp;
20582 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20583 return false;
20585 /* If one of the two operands is an interesting constant, load a
20586 constant with the above and mask it in with a logical operation. */
20588 if (CONST_INT_P (operands[2]))
20590 var = operands[3];
20591 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20592 operands[3] = constm1_rtx, op = and_optab;
20593 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20594 operands[3] = const0_rtx, op = ior_optab;
20595 else
20596 return false;
20598 else if (CONST_INT_P (operands[3]))
20600 var = operands[2];
20601 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20602 operands[2] = constm1_rtx, op = and_optab;
20603 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20604 operands[2] = const0_rtx, op = ior_optab;
20605 else
20606 return false;
20608 else
20609 return false;
20611 orig_out = operands[0];
20612 tmp = gen_reg_rtx (mode);
20613 operands[0] = tmp;
20615 /* Recurse to get the constant loaded. */
20616 if (ix86_expand_int_movcc (operands) == 0)
20617 return false;
20619 /* Mask in the interesting variable. */
20620 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20621 OPTAB_WIDEN);
20622 if (!rtx_equal_p (out, orig_out))
20623 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20625 return true;
20629 * For comparison with above,
20631 * movl cf,dest
20632 * movl ct,tmp
20633 * cmpl op1,op2
20634 * cmovcc tmp,dest
20636 * Size 15.
20639 if (! nonimmediate_operand (operands[2], mode))
20640 operands[2] = force_reg (mode, operands[2]);
20641 if (! nonimmediate_operand (operands[3], mode))
20642 operands[3] = force_reg (mode, operands[3]);
20644 if (! register_operand (operands[2], VOIDmode)
20645 && (mode == QImode
20646 || ! register_operand (operands[3], VOIDmode)))
20647 operands[2] = force_reg (mode, operands[2]);
20649 if (mode == QImode
20650 && ! register_operand (operands[3], VOIDmode))
20651 operands[3] = force_reg (mode, operands[3]);
20653 emit_insn (compare_seq);
20654 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20655 gen_rtx_IF_THEN_ELSE (mode,
20656 compare_op, operands[2],
20657 operands[3])));
20658 return true;
20661 /* Swap, force into registers, or otherwise massage the two operands
20662 to an sse comparison with a mask result. Thus we differ a bit from
20663 ix86_prepare_fp_compare_args which expects to produce a flags result.
20665 The DEST operand exists to help determine whether to commute commutative
20666 operators. The POP0/POP1 operands are updated in place. The new
20667 comparison code is returned, or UNKNOWN if not implementable. */
20669 static enum rtx_code
20670 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20671 rtx *pop0, rtx *pop1)
20673 rtx tmp;
20675 switch (code)
20677 case LTGT:
20678 case UNEQ:
20679 /* AVX supports all the needed comparisons. */
20680 if (TARGET_AVX)
20681 break;
20682 /* We have no LTGT as an operator. We could implement it with
20683 NE & ORDERED, but this requires an extra temporary. It's
20684 not clear that it's worth it. */
20685 return UNKNOWN;
20687 case LT:
20688 case LE:
20689 case UNGT:
20690 case UNGE:
20691 /* These are supported directly. */
20692 break;
20694 case EQ:
20695 case NE:
20696 case UNORDERED:
20697 case ORDERED:
20698 /* AVX has 3 operand comparisons, no need to swap anything. */
20699 if (TARGET_AVX)
20700 break;
20701 /* For commutative operators, try to canonicalize the destination
20702 operand to be first in the comparison - this helps reload to
20703 avoid extra moves. */
20704 if (!dest || !rtx_equal_p (dest, *pop1))
20705 break;
20706 /* FALLTHRU */
20708 case GE:
20709 case GT:
20710 case UNLE:
20711 case UNLT:
20712 /* These are not supported directly before AVX, and furthermore
20713 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20714 comparison operands to transform into something that is
20715 supported. */
20716 tmp = *pop0;
20717 *pop0 = *pop1;
20718 *pop1 = tmp;
20719 code = swap_condition (code);
20720 break;
20722 default:
20723 gcc_unreachable ();
20726 return code;
20729 /* Detect conditional moves that exactly match min/max operational
20730 semantics. Note that this is IEEE safe, as long as we don't
20731 interchange the operands.
20733 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20734 and TRUE if the operation is successful and instructions are emitted. */
20736 static bool
20737 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20738 rtx cmp_op1, rtx if_true, rtx if_false)
20740 enum machine_mode mode;
20741 bool is_min;
20742 rtx tmp;
20744 if (code == LT)
20746 else if (code == UNGE)
20748 tmp = if_true;
20749 if_true = if_false;
20750 if_false = tmp;
20752 else
20753 return false;
20755 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20756 is_min = true;
20757 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20758 is_min = false;
20759 else
20760 return false;
20762 mode = GET_MODE (dest);
20764 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20765 but MODE may be a vector mode and thus not appropriate. */
20766 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20768 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20769 rtvec v;
20771 if_true = force_reg (mode, if_true);
20772 v = gen_rtvec (2, if_true, if_false);
20773 tmp = gen_rtx_UNSPEC (mode, v, u);
20775 else
20777 code = is_min ? SMIN : SMAX;
20778 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20781 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20782 return true;
20785 /* Expand an sse vector comparison. Return the register with the result. */
20787 static rtx
20788 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20789 rtx op_true, rtx op_false)
20791 enum machine_mode mode = GET_MODE (dest);
20792 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20794 /* In general case result of comparison can differ from operands' type. */
20795 enum machine_mode cmp_mode;
20797 /* In AVX512F the result of comparison is an integer mask. */
20798 bool maskcmp = false;
20799 rtx x;
20801 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20803 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20804 gcc_assert (cmp_mode != BLKmode);
20806 maskcmp = true;
20808 else
20809 cmp_mode = cmp_ops_mode;
20812 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20813 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20814 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20816 if (optimize
20817 || reg_overlap_mentioned_p (dest, op_true)
20818 || reg_overlap_mentioned_p (dest, op_false))
20819 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20821 /* Compare patterns for int modes are unspec in AVX512F only. */
20822 if (maskcmp && (code == GT || code == EQ))
20824 rtx (*gen)(rtx, rtx, rtx);
20826 switch (cmp_ops_mode)
20828 case V16SImode:
20829 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20830 break;
20831 case V8DImode:
20832 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20833 break;
20834 default:
20835 gen = NULL;
20838 if (gen)
20840 emit_insn (gen (dest, cmp_op0, cmp_op1));
20841 return dest;
20844 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20846 if (cmp_mode != mode && !maskcmp)
20848 x = force_reg (cmp_ops_mode, x);
20849 convert_move (dest, x, false);
20851 else
20852 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20854 return dest;
20857 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20858 operations. This is used for both scalar and vector conditional moves. */
20860 static void
20861 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20863 enum machine_mode mode = GET_MODE (dest);
20864 enum machine_mode cmpmode = GET_MODE (cmp);
20866 /* In AVX512F the result of comparison is an integer mask. */
20867 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20869 rtx t2, t3, x;
20871 if (vector_all_ones_operand (op_true, mode)
20872 && rtx_equal_p (op_false, CONST0_RTX (mode))
20873 && !maskcmp)
20875 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20877 else if (op_false == CONST0_RTX (mode)
20878 && !maskcmp)
20880 op_true = force_reg (mode, op_true);
20881 x = gen_rtx_AND (mode, cmp, op_true);
20882 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20884 else if (op_true == CONST0_RTX (mode)
20885 && !maskcmp)
20887 op_false = force_reg (mode, op_false);
20888 x = gen_rtx_NOT (mode, cmp);
20889 x = gen_rtx_AND (mode, x, op_false);
20890 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20892 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20893 && !maskcmp)
20895 op_false = force_reg (mode, op_false);
20896 x = gen_rtx_IOR (mode, cmp, op_false);
20897 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20899 else if (TARGET_XOP
20900 && !maskcmp)
20902 op_true = force_reg (mode, op_true);
20904 if (!nonimmediate_operand (op_false, mode))
20905 op_false = force_reg (mode, op_false);
20907 emit_insn (gen_rtx_SET (mode, dest,
20908 gen_rtx_IF_THEN_ELSE (mode, cmp,
20909 op_true,
20910 op_false)));
20912 else
20914 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20915 rtx d = dest;
20917 if (!nonimmediate_operand (op_true, mode))
20918 op_true = force_reg (mode, op_true);
20920 op_false = force_reg (mode, op_false);
20922 switch (mode)
20924 case V4SFmode:
20925 if (TARGET_SSE4_1)
20926 gen = gen_sse4_1_blendvps;
20927 break;
20928 case V2DFmode:
20929 if (TARGET_SSE4_1)
20930 gen = gen_sse4_1_blendvpd;
20931 break;
20932 case V16QImode:
20933 case V8HImode:
20934 case V4SImode:
20935 case V2DImode:
20936 if (TARGET_SSE4_1)
20938 gen = gen_sse4_1_pblendvb;
20939 if (mode != V16QImode)
20940 d = gen_reg_rtx (V16QImode);
20941 op_false = gen_lowpart (V16QImode, op_false);
20942 op_true = gen_lowpart (V16QImode, op_true);
20943 cmp = gen_lowpart (V16QImode, cmp);
20945 break;
20946 case V8SFmode:
20947 if (TARGET_AVX)
20948 gen = gen_avx_blendvps256;
20949 break;
20950 case V4DFmode:
20951 if (TARGET_AVX)
20952 gen = gen_avx_blendvpd256;
20953 break;
20954 case V32QImode:
20955 case V16HImode:
20956 case V8SImode:
20957 case V4DImode:
20958 if (TARGET_AVX2)
20960 gen = gen_avx2_pblendvb;
20961 if (mode != V32QImode)
20962 d = gen_reg_rtx (V32QImode);
20963 op_false = gen_lowpart (V32QImode, op_false);
20964 op_true = gen_lowpart (V32QImode, op_true);
20965 cmp = gen_lowpart (V32QImode, cmp);
20967 break;
20969 case V16SImode:
20970 gen = gen_avx512f_blendmv16si;
20971 break;
20972 case V8DImode:
20973 gen = gen_avx512f_blendmv8di;
20974 break;
20975 case V8DFmode:
20976 gen = gen_avx512f_blendmv8df;
20977 break;
20978 case V16SFmode:
20979 gen = gen_avx512f_blendmv16sf;
20980 break;
20982 default:
20983 break;
20986 if (gen != NULL)
20988 emit_insn (gen (d, op_false, op_true, cmp));
20989 if (d != dest)
20990 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
20992 else
20994 op_true = force_reg (mode, op_true);
20996 t2 = gen_reg_rtx (mode);
20997 if (optimize)
20998 t3 = gen_reg_rtx (mode);
20999 else
21000 t3 = dest;
21002 x = gen_rtx_AND (mode, op_true, cmp);
21003 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21005 x = gen_rtx_NOT (mode, cmp);
21006 x = gen_rtx_AND (mode, x, op_false);
21007 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21009 x = gen_rtx_IOR (mode, t3, t2);
21010 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21015 /* Expand a floating-point conditional move. Return true if successful. */
21017 bool
21018 ix86_expand_fp_movcc (rtx operands[])
21020 enum machine_mode mode = GET_MODE (operands[0]);
21021 enum rtx_code code = GET_CODE (operands[1]);
21022 rtx tmp, compare_op;
21023 rtx op0 = XEXP (operands[1], 0);
21024 rtx op1 = XEXP (operands[1], 1);
21026 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21028 enum machine_mode cmode;
21030 /* Since we've no cmove for sse registers, don't force bad register
21031 allocation just to gain access to it. Deny movcc when the
21032 comparison mode doesn't match the move mode. */
21033 cmode = GET_MODE (op0);
21034 if (cmode == VOIDmode)
21035 cmode = GET_MODE (op1);
21036 if (cmode != mode)
21037 return false;
21039 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21040 if (code == UNKNOWN)
21041 return false;
21043 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21044 operands[2], operands[3]))
21045 return true;
21047 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21048 operands[2], operands[3]);
21049 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21050 return true;
21053 if (GET_MODE (op0) == TImode
21054 || (GET_MODE (op0) == DImode
21055 && !TARGET_64BIT))
21056 return false;
21058 /* The floating point conditional move instructions don't directly
21059 support conditions resulting from a signed integer comparison. */
21061 compare_op = ix86_expand_compare (code, op0, op1);
21062 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21064 tmp = gen_reg_rtx (QImode);
21065 ix86_expand_setcc (tmp, code, op0, op1);
21067 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21070 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21071 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21072 operands[2], operands[3])));
21074 return true;
21077 /* Expand a floating-point vector conditional move; a vcond operation
21078 rather than a movcc operation. */
21080 bool
21081 ix86_expand_fp_vcond (rtx operands[])
21083 enum rtx_code code = GET_CODE (operands[3]);
21084 rtx cmp;
21086 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21087 &operands[4], &operands[5]);
21088 if (code == UNKNOWN)
21090 rtx temp;
21091 switch (GET_CODE (operands[3]))
21093 case LTGT:
21094 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21095 operands[5], operands[0], operands[0]);
21096 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21097 operands[5], operands[1], operands[2]);
21098 code = AND;
21099 break;
21100 case UNEQ:
21101 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21102 operands[5], operands[0], operands[0]);
21103 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21104 operands[5], operands[1], operands[2]);
21105 code = IOR;
21106 break;
21107 default:
21108 gcc_unreachable ();
21110 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21111 OPTAB_DIRECT);
21112 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21113 return true;
21116 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21117 operands[5], operands[1], operands[2]))
21118 return true;
21120 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21121 operands[1], operands[2]);
21122 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21123 return true;
21126 /* Expand a signed/unsigned integral vector conditional move. */
21128 bool
21129 ix86_expand_int_vcond (rtx operands[])
21131 enum machine_mode data_mode = GET_MODE (operands[0]);
21132 enum machine_mode mode = GET_MODE (operands[4]);
21133 enum rtx_code code = GET_CODE (operands[3]);
21134 bool negate = false;
21135 rtx x, cop0, cop1;
21137 cop0 = operands[4];
21138 cop1 = operands[5];
21140 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21141 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21142 if ((code == LT || code == GE)
21143 && data_mode == mode
21144 && cop1 == CONST0_RTX (mode)
21145 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21146 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21147 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21148 && (GET_MODE_SIZE (data_mode) == 16
21149 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21151 rtx negop = operands[2 - (code == LT)];
21152 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21153 if (negop == CONST1_RTX (data_mode))
21155 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21156 operands[0], 1, OPTAB_DIRECT);
21157 if (res != operands[0])
21158 emit_move_insn (operands[0], res);
21159 return true;
21161 else if (GET_MODE_INNER (data_mode) != DImode
21162 && vector_all_ones_operand (negop, data_mode))
21164 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21165 operands[0], 0, OPTAB_DIRECT);
21166 if (res != operands[0])
21167 emit_move_insn (operands[0], res);
21168 return true;
21172 if (!nonimmediate_operand (cop1, mode))
21173 cop1 = force_reg (mode, cop1);
21174 if (!general_operand (operands[1], data_mode))
21175 operands[1] = force_reg (data_mode, operands[1]);
21176 if (!general_operand (operands[2], data_mode))
21177 operands[2] = force_reg (data_mode, operands[2]);
21179 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21180 if (TARGET_XOP
21181 && (mode == V16QImode || mode == V8HImode
21182 || mode == V4SImode || mode == V2DImode))
21184 else
21186 /* Canonicalize the comparison to EQ, GT, GTU. */
21187 switch (code)
21189 case EQ:
21190 case GT:
21191 case GTU:
21192 break;
21194 case NE:
21195 case LE:
21196 case LEU:
21197 code = reverse_condition (code);
21198 negate = true;
21199 break;
21201 case GE:
21202 case GEU:
21203 code = reverse_condition (code);
21204 negate = true;
21205 /* FALLTHRU */
21207 case LT:
21208 case LTU:
21209 code = swap_condition (code);
21210 x = cop0, cop0 = cop1, cop1 = x;
21211 break;
21213 default:
21214 gcc_unreachable ();
21217 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21218 if (mode == V2DImode)
21220 switch (code)
21222 case EQ:
21223 /* SSE4.1 supports EQ. */
21224 if (!TARGET_SSE4_1)
21225 return false;
21226 break;
21228 case GT:
21229 case GTU:
21230 /* SSE4.2 supports GT/GTU. */
21231 if (!TARGET_SSE4_2)
21232 return false;
21233 break;
21235 default:
21236 gcc_unreachable ();
21240 /* Unsigned parallel compare is not supported by the hardware.
21241 Play some tricks to turn this into a signed comparison
21242 against 0. */
21243 if (code == GTU)
21245 cop0 = force_reg (mode, cop0);
21247 switch (mode)
21249 case V16SImode:
21250 case V8DImode:
21251 case V8SImode:
21252 case V4DImode:
21253 case V4SImode:
21254 case V2DImode:
21256 rtx t1, t2, mask;
21257 rtx (*gen_sub3) (rtx, rtx, rtx);
21259 switch (mode)
21261 case V16SImode: gen_sub3 = gen_subv16si3; break;
21262 case V8DImode: gen_sub3 = gen_subv8di3; break;
21263 case V8SImode: gen_sub3 = gen_subv8si3; break;
21264 case V4DImode: gen_sub3 = gen_subv4di3; break;
21265 case V4SImode: gen_sub3 = gen_subv4si3; break;
21266 case V2DImode: gen_sub3 = gen_subv2di3; break;
21267 default:
21268 gcc_unreachable ();
21270 /* Subtract (-(INT MAX) - 1) from both operands to make
21271 them signed. */
21272 mask = ix86_build_signbit_mask (mode, true, false);
21273 t1 = gen_reg_rtx (mode);
21274 emit_insn (gen_sub3 (t1, cop0, mask));
21276 t2 = gen_reg_rtx (mode);
21277 emit_insn (gen_sub3 (t2, cop1, mask));
21279 cop0 = t1;
21280 cop1 = t2;
21281 code = GT;
21283 break;
21285 case V32QImode:
21286 case V16HImode:
21287 case V16QImode:
21288 case V8HImode:
21289 /* Perform a parallel unsigned saturating subtraction. */
21290 x = gen_reg_rtx (mode);
21291 emit_insn (gen_rtx_SET (VOIDmode, x,
21292 gen_rtx_US_MINUS (mode, cop0, cop1)));
21294 cop0 = x;
21295 cop1 = CONST0_RTX (mode);
21296 code = EQ;
21297 negate = !negate;
21298 break;
21300 default:
21301 gcc_unreachable ();
21306 /* Allow the comparison to be done in one mode, but the movcc to
21307 happen in another mode. */
21308 if (data_mode == mode)
21310 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21311 operands[1+negate], operands[2-negate]);
21313 else
21315 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21316 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21317 operands[1+negate], operands[2-negate]);
21318 if (GET_MODE (x) == mode)
21319 x = gen_lowpart (data_mode, x);
21322 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21323 operands[2-negate]);
21324 return true;
21327 static bool
21328 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21330 enum machine_mode mode = GET_MODE (op0);
21331 switch (mode)
21333 case V16SImode:
21334 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21335 force_reg (V16SImode, mask),
21336 op1));
21337 return true;
21338 case V16SFmode:
21339 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21340 force_reg (V16SImode, mask),
21341 op1));
21342 return true;
21343 case V8DImode:
21344 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21345 force_reg (V8DImode, mask), op1));
21346 return true;
21347 case V8DFmode:
21348 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21349 force_reg (V8DImode, mask), op1));
21350 return true;
21351 default:
21352 return false;
21356 /* Expand a variable vector permutation. */
21358 void
21359 ix86_expand_vec_perm (rtx operands[])
21361 rtx target = operands[0];
21362 rtx op0 = operands[1];
21363 rtx op1 = operands[2];
21364 rtx mask = operands[3];
21365 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21366 enum machine_mode mode = GET_MODE (op0);
21367 enum machine_mode maskmode = GET_MODE (mask);
21368 int w, e, i;
21369 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21371 /* Number of elements in the vector. */
21372 w = GET_MODE_NUNITS (mode);
21373 e = GET_MODE_UNIT_SIZE (mode);
21374 gcc_assert (w <= 64);
21376 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21377 return;
21379 if (TARGET_AVX2)
21381 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21383 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21384 an constant shuffle operand. With a tiny bit of effort we can
21385 use VPERMD instead. A re-interpretation stall for V4DFmode is
21386 unfortunate but there's no avoiding it.
21387 Similarly for V16HImode we don't have instructions for variable
21388 shuffling, while for V32QImode we can use after preparing suitable
21389 masks vpshufb; vpshufb; vpermq; vpor. */
21391 if (mode == V16HImode)
21393 maskmode = mode = V32QImode;
21394 w = 32;
21395 e = 1;
21397 else
21399 maskmode = mode = V8SImode;
21400 w = 8;
21401 e = 4;
21403 t1 = gen_reg_rtx (maskmode);
21405 /* Replicate the low bits of the V4DImode mask into V8SImode:
21406 mask = { A B C D }
21407 t1 = { A A B B C C D D }. */
21408 for (i = 0; i < w / 2; ++i)
21409 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21410 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21411 vt = force_reg (maskmode, vt);
21412 mask = gen_lowpart (maskmode, mask);
21413 if (maskmode == V8SImode)
21414 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21415 else
21416 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21418 /* Multiply the shuffle indicies by two. */
21419 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21420 OPTAB_DIRECT);
21422 /* Add one to the odd shuffle indicies:
21423 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21424 for (i = 0; i < w / 2; ++i)
21426 vec[i * 2] = const0_rtx;
21427 vec[i * 2 + 1] = const1_rtx;
21429 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21430 vt = validize_mem (force_const_mem (maskmode, vt));
21431 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21432 OPTAB_DIRECT);
21434 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21435 operands[3] = mask = t1;
21436 target = gen_reg_rtx (mode);
21437 op0 = gen_lowpart (mode, op0);
21438 op1 = gen_lowpart (mode, op1);
21441 switch (mode)
21443 case V8SImode:
21444 /* The VPERMD and VPERMPS instructions already properly ignore
21445 the high bits of the shuffle elements. No need for us to
21446 perform an AND ourselves. */
21447 if (one_operand_shuffle)
21449 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21450 if (target != operands[0])
21451 emit_move_insn (operands[0],
21452 gen_lowpart (GET_MODE (operands[0]), target));
21454 else
21456 t1 = gen_reg_rtx (V8SImode);
21457 t2 = gen_reg_rtx (V8SImode);
21458 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21459 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21460 goto merge_two;
21462 return;
21464 case V8SFmode:
21465 mask = gen_lowpart (V8SImode, mask);
21466 if (one_operand_shuffle)
21467 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21468 else
21470 t1 = gen_reg_rtx (V8SFmode);
21471 t2 = gen_reg_rtx (V8SFmode);
21472 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21473 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21474 goto merge_two;
21476 return;
21478 case V4SImode:
21479 /* By combining the two 128-bit input vectors into one 256-bit
21480 input vector, we can use VPERMD and VPERMPS for the full
21481 two-operand shuffle. */
21482 t1 = gen_reg_rtx (V8SImode);
21483 t2 = gen_reg_rtx (V8SImode);
21484 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21485 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21486 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21487 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21488 return;
21490 case V4SFmode:
21491 t1 = gen_reg_rtx (V8SFmode);
21492 t2 = gen_reg_rtx (V8SImode);
21493 mask = gen_lowpart (V4SImode, mask);
21494 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21495 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21496 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21497 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21498 return;
21500 case V32QImode:
21501 t1 = gen_reg_rtx (V32QImode);
21502 t2 = gen_reg_rtx (V32QImode);
21503 t3 = gen_reg_rtx (V32QImode);
21504 vt2 = GEN_INT (128);
21505 for (i = 0; i < 32; i++)
21506 vec[i] = vt2;
21507 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21508 vt = force_reg (V32QImode, vt);
21509 for (i = 0; i < 32; i++)
21510 vec[i] = i < 16 ? vt2 : const0_rtx;
21511 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21512 vt2 = force_reg (V32QImode, vt2);
21513 /* From mask create two adjusted masks, which contain the same
21514 bits as mask in the low 7 bits of each vector element.
21515 The first mask will have the most significant bit clear
21516 if it requests element from the same 128-bit lane
21517 and MSB set if it requests element from the other 128-bit lane.
21518 The second mask will have the opposite values of the MSB,
21519 and additionally will have its 128-bit lanes swapped.
21520 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21521 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21522 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21523 stands for other 12 bytes. */
21524 /* The bit whether element is from the same lane or the other
21525 lane is bit 4, so shift it up by 3 to the MSB position. */
21526 t5 = gen_reg_rtx (V4DImode);
21527 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21528 GEN_INT (3)));
21529 /* Clear MSB bits from the mask just in case it had them set. */
21530 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21531 /* After this t1 will have MSB set for elements from other lane. */
21532 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21533 /* Clear bits other than MSB. */
21534 emit_insn (gen_andv32qi3 (t1, t1, vt));
21535 /* Or in the lower bits from mask into t3. */
21536 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21537 /* And invert MSB bits in t1, so MSB is set for elements from the same
21538 lane. */
21539 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21540 /* Swap 128-bit lanes in t3. */
21541 t6 = gen_reg_rtx (V4DImode);
21542 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21543 const2_rtx, GEN_INT (3),
21544 const0_rtx, const1_rtx));
21545 /* And or in the lower bits from mask into t1. */
21546 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21547 if (one_operand_shuffle)
21549 /* Each of these shuffles will put 0s in places where
21550 element from the other 128-bit lane is needed, otherwise
21551 will shuffle in the requested value. */
21552 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21553 gen_lowpart (V32QImode, t6)));
21554 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21555 /* For t3 the 128-bit lanes are swapped again. */
21556 t7 = gen_reg_rtx (V4DImode);
21557 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21558 const2_rtx, GEN_INT (3),
21559 const0_rtx, const1_rtx));
21560 /* And oring both together leads to the result. */
21561 emit_insn (gen_iorv32qi3 (target, t1,
21562 gen_lowpart (V32QImode, t7)));
21563 if (target != operands[0])
21564 emit_move_insn (operands[0],
21565 gen_lowpart (GET_MODE (operands[0]), target));
21566 return;
21569 t4 = gen_reg_rtx (V32QImode);
21570 /* Similarly to the above one_operand_shuffle code,
21571 just for repeated twice for each operand. merge_two:
21572 code will merge the two results together. */
21573 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21574 gen_lowpart (V32QImode, t6)));
21575 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21576 gen_lowpart (V32QImode, t6)));
21577 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21578 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21579 t7 = gen_reg_rtx (V4DImode);
21580 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21581 const2_rtx, GEN_INT (3),
21582 const0_rtx, const1_rtx));
21583 t8 = gen_reg_rtx (V4DImode);
21584 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21585 const2_rtx, GEN_INT (3),
21586 const0_rtx, const1_rtx));
21587 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21588 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21589 t1 = t4;
21590 t2 = t3;
21591 goto merge_two;
21593 default:
21594 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21595 break;
21599 if (TARGET_XOP)
21601 /* The XOP VPPERM insn supports three inputs. By ignoring the
21602 one_operand_shuffle special case, we avoid creating another
21603 set of constant vectors in memory. */
21604 one_operand_shuffle = false;
21606 /* mask = mask & {2*w-1, ...} */
21607 vt = GEN_INT (2*w - 1);
21609 else
21611 /* mask = mask & {w-1, ...} */
21612 vt = GEN_INT (w - 1);
21615 for (i = 0; i < w; i++)
21616 vec[i] = vt;
21617 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21618 mask = expand_simple_binop (maskmode, AND, mask, vt,
21619 NULL_RTX, 0, OPTAB_DIRECT);
21621 /* For non-QImode operations, convert the word permutation control
21622 into a byte permutation control. */
21623 if (mode != V16QImode)
21625 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21626 GEN_INT (exact_log2 (e)),
21627 NULL_RTX, 0, OPTAB_DIRECT);
21629 /* Convert mask to vector of chars. */
21630 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21632 /* Replicate each of the input bytes into byte positions:
21633 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21634 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21635 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21636 for (i = 0; i < 16; ++i)
21637 vec[i] = GEN_INT (i/e * e);
21638 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21639 vt = validize_mem (force_const_mem (V16QImode, vt));
21640 if (TARGET_XOP)
21641 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21642 else
21643 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21645 /* Convert it into the byte positions by doing
21646 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21647 for (i = 0; i < 16; ++i)
21648 vec[i] = GEN_INT (i % e);
21649 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21650 vt = validize_mem (force_const_mem (V16QImode, vt));
21651 emit_insn (gen_addv16qi3 (mask, mask, vt));
21654 /* The actual shuffle operations all operate on V16QImode. */
21655 op0 = gen_lowpart (V16QImode, op0);
21656 op1 = gen_lowpart (V16QImode, op1);
21658 if (TARGET_XOP)
21660 if (GET_MODE (target) != V16QImode)
21661 target = gen_reg_rtx (V16QImode);
21662 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21663 if (target != operands[0])
21664 emit_move_insn (operands[0],
21665 gen_lowpart (GET_MODE (operands[0]), target));
21667 else if (one_operand_shuffle)
21669 if (GET_MODE (target) != V16QImode)
21670 target = gen_reg_rtx (V16QImode);
21671 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21672 if (target != operands[0])
21673 emit_move_insn (operands[0],
21674 gen_lowpart (GET_MODE (operands[0]), target));
21676 else
21678 rtx xops[6];
21679 bool ok;
21681 /* Shuffle the two input vectors independently. */
21682 t1 = gen_reg_rtx (V16QImode);
21683 t2 = gen_reg_rtx (V16QImode);
21684 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21685 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21687 merge_two:
21688 /* Then merge them together. The key is whether any given control
21689 element contained a bit set that indicates the second word. */
21690 mask = operands[3];
21691 vt = GEN_INT (w);
21692 if (maskmode == V2DImode && !TARGET_SSE4_1)
21694 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21695 more shuffle to convert the V2DI input mask into a V4SI
21696 input mask. At which point the masking that expand_int_vcond
21697 will work as desired. */
21698 rtx t3 = gen_reg_rtx (V4SImode);
21699 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21700 const0_rtx, const0_rtx,
21701 const2_rtx, const2_rtx));
21702 mask = t3;
21703 maskmode = V4SImode;
21704 e = w = 4;
21707 for (i = 0; i < w; i++)
21708 vec[i] = vt;
21709 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21710 vt = force_reg (maskmode, vt);
21711 mask = expand_simple_binop (maskmode, AND, mask, vt,
21712 NULL_RTX, 0, OPTAB_DIRECT);
21714 if (GET_MODE (target) != mode)
21715 target = gen_reg_rtx (mode);
21716 xops[0] = target;
21717 xops[1] = gen_lowpart (mode, t2);
21718 xops[2] = gen_lowpart (mode, t1);
21719 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21720 xops[4] = mask;
21721 xops[5] = vt;
21722 ok = ix86_expand_int_vcond (xops);
21723 gcc_assert (ok);
21724 if (target != operands[0])
21725 emit_move_insn (operands[0],
21726 gen_lowpart (GET_MODE (operands[0]), target));
21730 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21731 true if we should do zero extension, else sign extension. HIGH_P is
21732 true if we want the N/2 high elements, else the low elements. */
21734 void
21735 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21737 enum machine_mode imode = GET_MODE (src);
21738 rtx tmp;
21740 if (TARGET_SSE4_1)
21742 rtx (*unpack)(rtx, rtx);
21743 rtx (*extract)(rtx, rtx) = NULL;
21744 enum machine_mode halfmode = BLKmode;
21746 switch (imode)
21748 case V32QImode:
21749 if (unsigned_p)
21750 unpack = gen_avx2_zero_extendv16qiv16hi2;
21751 else
21752 unpack = gen_avx2_sign_extendv16qiv16hi2;
21753 halfmode = V16QImode;
21754 extract
21755 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21756 break;
21757 case V32HImode:
21758 if (unsigned_p)
21759 unpack = gen_avx512f_zero_extendv16hiv16si2;
21760 else
21761 unpack = gen_avx512f_sign_extendv16hiv16si2;
21762 halfmode = V16HImode;
21763 extract
21764 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21765 break;
21766 case V16HImode:
21767 if (unsigned_p)
21768 unpack = gen_avx2_zero_extendv8hiv8si2;
21769 else
21770 unpack = gen_avx2_sign_extendv8hiv8si2;
21771 halfmode = V8HImode;
21772 extract
21773 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21774 break;
21775 case V16SImode:
21776 if (unsigned_p)
21777 unpack = gen_avx512f_zero_extendv8siv8di2;
21778 else
21779 unpack = gen_avx512f_sign_extendv8siv8di2;
21780 halfmode = V8SImode;
21781 extract
21782 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21783 break;
21784 case V8SImode:
21785 if (unsigned_p)
21786 unpack = gen_avx2_zero_extendv4siv4di2;
21787 else
21788 unpack = gen_avx2_sign_extendv4siv4di2;
21789 halfmode = V4SImode;
21790 extract
21791 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21792 break;
21793 case V16QImode:
21794 if (unsigned_p)
21795 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21796 else
21797 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21798 break;
21799 case V8HImode:
21800 if (unsigned_p)
21801 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21802 else
21803 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21804 break;
21805 case V4SImode:
21806 if (unsigned_p)
21807 unpack = gen_sse4_1_zero_extendv2siv2di2;
21808 else
21809 unpack = gen_sse4_1_sign_extendv2siv2di2;
21810 break;
21811 default:
21812 gcc_unreachable ();
21815 if (GET_MODE_SIZE (imode) >= 32)
21817 tmp = gen_reg_rtx (halfmode);
21818 emit_insn (extract (tmp, src));
21820 else if (high_p)
21822 /* Shift higher 8 bytes to lower 8 bytes. */
21823 tmp = gen_reg_rtx (V1TImode);
21824 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21825 GEN_INT (64)));
21826 tmp = gen_lowpart (imode, tmp);
21828 else
21829 tmp = src;
21831 emit_insn (unpack (dest, tmp));
21833 else
21835 rtx (*unpack)(rtx, rtx, rtx);
21837 switch (imode)
21839 case V16QImode:
21840 if (high_p)
21841 unpack = gen_vec_interleave_highv16qi;
21842 else
21843 unpack = gen_vec_interleave_lowv16qi;
21844 break;
21845 case V8HImode:
21846 if (high_p)
21847 unpack = gen_vec_interleave_highv8hi;
21848 else
21849 unpack = gen_vec_interleave_lowv8hi;
21850 break;
21851 case V4SImode:
21852 if (high_p)
21853 unpack = gen_vec_interleave_highv4si;
21854 else
21855 unpack = gen_vec_interleave_lowv4si;
21856 break;
21857 default:
21858 gcc_unreachable ();
21861 if (unsigned_p)
21862 tmp = force_reg (imode, CONST0_RTX (imode));
21863 else
21864 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21865 src, pc_rtx, pc_rtx);
21867 rtx tmp2 = gen_reg_rtx (imode);
21868 emit_insn (unpack (tmp2, src, tmp));
21869 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21873 /* Expand conditional increment or decrement using adb/sbb instructions.
21874 The default case using setcc followed by the conditional move can be
21875 done by generic code. */
21876 bool
21877 ix86_expand_int_addcc (rtx operands[])
21879 enum rtx_code code = GET_CODE (operands[1]);
21880 rtx flags;
21881 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21882 rtx compare_op;
21883 rtx val = const0_rtx;
21884 bool fpcmp = false;
21885 enum machine_mode mode;
21886 rtx op0 = XEXP (operands[1], 0);
21887 rtx op1 = XEXP (operands[1], 1);
21889 if (operands[3] != const1_rtx
21890 && operands[3] != constm1_rtx)
21891 return false;
21892 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21893 return false;
21894 code = GET_CODE (compare_op);
21896 flags = XEXP (compare_op, 0);
21898 if (GET_MODE (flags) == CCFPmode
21899 || GET_MODE (flags) == CCFPUmode)
21901 fpcmp = true;
21902 code = ix86_fp_compare_code_to_integer (code);
21905 if (code != LTU)
21907 val = constm1_rtx;
21908 if (fpcmp)
21909 PUT_CODE (compare_op,
21910 reverse_condition_maybe_unordered
21911 (GET_CODE (compare_op)));
21912 else
21913 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21916 mode = GET_MODE (operands[0]);
21918 /* Construct either adc or sbb insn. */
21919 if ((code == LTU) == (operands[3] == constm1_rtx))
21921 switch (mode)
21923 case QImode:
21924 insn = gen_subqi3_carry;
21925 break;
21926 case HImode:
21927 insn = gen_subhi3_carry;
21928 break;
21929 case SImode:
21930 insn = gen_subsi3_carry;
21931 break;
21932 case DImode:
21933 insn = gen_subdi3_carry;
21934 break;
21935 default:
21936 gcc_unreachable ();
21939 else
21941 switch (mode)
21943 case QImode:
21944 insn = gen_addqi3_carry;
21945 break;
21946 case HImode:
21947 insn = gen_addhi3_carry;
21948 break;
21949 case SImode:
21950 insn = gen_addsi3_carry;
21951 break;
21952 case DImode:
21953 insn = gen_adddi3_carry;
21954 break;
21955 default:
21956 gcc_unreachable ();
21959 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21961 return true;
21965 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21966 but works for floating pointer parameters and nonoffsetable memories.
21967 For pushes, it returns just stack offsets; the values will be saved
21968 in the right order. Maximally three parts are generated. */
21970 static int
21971 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21973 int size;
21975 if (!TARGET_64BIT)
21976 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21977 else
21978 size = (GET_MODE_SIZE (mode) + 4) / 8;
21980 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
21981 gcc_assert (size >= 2 && size <= 4);
21983 /* Optimize constant pool reference to immediates. This is used by fp
21984 moves, that force all constants to memory to allow combining. */
21985 if (MEM_P (operand) && MEM_READONLY_P (operand))
21987 rtx tmp = maybe_get_pool_constant (operand);
21988 if (tmp)
21989 operand = tmp;
21992 if (MEM_P (operand) && !offsettable_memref_p (operand))
21994 /* The only non-offsetable memories we handle are pushes. */
21995 int ok = push_operand (operand, VOIDmode);
21997 gcc_assert (ok);
21999 operand = copy_rtx (operand);
22000 PUT_MODE (operand, word_mode);
22001 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22002 return size;
22005 if (GET_CODE (operand) == CONST_VECTOR)
22007 enum machine_mode imode = int_mode_for_mode (mode);
22008 /* Caution: if we looked through a constant pool memory above,
22009 the operand may actually have a different mode now. That's
22010 ok, since we want to pun this all the way back to an integer. */
22011 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22012 gcc_assert (operand != NULL);
22013 mode = imode;
22016 if (!TARGET_64BIT)
22018 if (mode == DImode)
22019 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22020 else
22022 int i;
22024 if (REG_P (operand))
22026 gcc_assert (reload_completed);
22027 for (i = 0; i < size; i++)
22028 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22030 else if (offsettable_memref_p (operand))
22032 operand = adjust_address (operand, SImode, 0);
22033 parts[0] = operand;
22034 for (i = 1; i < size; i++)
22035 parts[i] = adjust_address (operand, SImode, 4 * i);
22037 else if (GET_CODE (operand) == CONST_DOUBLE)
22039 REAL_VALUE_TYPE r;
22040 long l[4];
22042 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22043 switch (mode)
22045 case TFmode:
22046 real_to_target (l, &r, mode);
22047 parts[3] = gen_int_mode (l[3], SImode);
22048 parts[2] = gen_int_mode (l[2], SImode);
22049 break;
22050 case XFmode:
22051 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22052 long double may not be 80-bit. */
22053 real_to_target (l, &r, mode);
22054 parts[2] = gen_int_mode (l[2], SImode);
22055 break;
22056 case DFmode:
22057 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22058 break;
22059 default:
22060 gcc_unreachable ();
22062 parts[1] = gen_int_mode (l[1], SImode);
22063 parts[0] = gen_int_mode (l[0], SImode);
22065 else
22066 gcc_unreachable ();
22069 else
22071 if (mode == TImode)
22072 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22073 if (mode == XFmode || mode == TFmode)
22075 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22076 if (REG_P (operand))
22078 gcc_assert (reload_completed);
22079 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22080 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22082 else if (offsettable_memref_p (operand))
22084 operand = adjust_address (operand, DImode, 0);
22085 parts[0] = operand;
22086 parts[1] = adjust_address (operand, upper_mode, 8);
22088 else if (GET_CODE (operand) == CONST_DOUBLE)
22090 REAL_VALUE_TYPE r;
22091 long l[4];
22093 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22094 real_to_target (l, &r, mode);
22096 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22097 if (HOST_BITS_PER_WIDE_INT >= 64)
22098 parts[0]
22099 = gen_int_mode
22100 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22101 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22102 DImode);
22103 else
22104 parts[0] = immed_double_const (l[0], l[1], DImode);
22106 if (upper_mode == SImode)
22107 parts[1] = gen_int_mode (l[2], SImode);
22108 else if (HOST_BITS_PER_WIDE_INT >= 64)
22109 parts[1]
22110 = gen_int_mode
22111 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22112 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22113 DImode);
22114 else
22115 parts[1] = immed_double_const (l[2], l[3], DImode);
22117 else
22118 gcc_unreachable ();
22122 return size;
22125 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22126 Return false when normal moves are needed; true when all required
22127 insns have been emitted. Operands 2-4 contain the input values
22128 int the correct order; operands 5-7 contain the output values. */
22130 void
22131 ix86_split_long_move (rtx operands[])
22133 rtx part[2][4];
22134 int nparts, i, j;
22135 int push = 0;
22136 int collisions = 0;
22137 enum machine_mode mode = GET_MODE (operands[0]);
22138 bool collisionparts[4];
22140 /* The DFmode expanders may ask us to move double.
22141 For 64bit target this is single move. By hiding the fact
22142 here we simplify i386.md splitters. */
22143 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22145 /* Optimize constant pool reference to immediates. This is used by
22146 fp moves, that force all constants to memory to allow combining. */
22148 if (MEM_P (operands[1])
22149 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22150 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22151 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22152 if (push_operand (operands[0], VOIDmode))
22154 operands[0] = copy_rtx (operands[0]);
22155 PUT_MODE (operands[0], word_mode);
22157 else
22158 operands[0] = gen_lowpart (DImode, operands[0]);
22159 operands[1] = gen_lowpart (DImode, operands[1]);
22160 emit_move_insn (operands[0], operands[1]);
22161 return;
22164 /* The only non-offsettable memory we handle is push. */
22165 if (push_operand (operands[0], VOIDmode))
22166 push = 1;
22167 else
22168 gcc_assert (!MEM_P (operands[0])
22169 || offsettable_memref_p (operands[0]));
22171 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22172 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22174 /* When emitting push, take care for source operands on the stack. */
22175 if (push && MEM_P (operands[1])
22176 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22178 rtx src_base = XEXP (part[1][nparts - 1], 0);
22180 /* Compensate for the stack decrement by 4. */
22181 if (!TARGET_64BIT && nparts == 3
22182 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22183 src_base = plus_constant (Pmode, src_base, 4);
22185 /* src_base refers to the stack pointer and is
22186 automatically decreased by emitted push. */
22187 for (i = 0; i < nparts; i++)
22188 part[1][i] = change_address (part[1][i],
22189 GET_MODE (part[1][i]), src_base);
22192 /* We need to do copy in the right order in case an address register
22193 of the source overlaps the destination. */
22194 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22196 rtx tmp;
22198 for (i = 0; i < nparts; i++)
22200 collisionparts[i]
22201 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22202 if (collisionparts[i])
22203 collisions++;
22206 /* Collision in the middle part can be handled by reordering. */
22207 if (collisions == 1 && nparts == 3 && collisionparts [1])
22209 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22210 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22212 else if (collisions == 1
22213 && nparts == 4
22214 && (collisionparts [1] || collisionparts [2]))
22216 if (collisionparts [1])
22218 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22219 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22221 else
22223 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22224 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22228 /* If there are more collisions, we can't handle it by reordering.
22229 Do an lea to the last part and use only one colliding move. */
22230 else if (collisions > 1)
22232 rtx base;
22234 collisions = 1;
22236 base = part[0][nparts - 1];
22238 /* Handle the case when the last part isn't valid for lea.
22239 Happens in 64-bit mode storing the 12-byte XFmode. */
22240 if (GET_MODE (base) != Pmode)
22241 base = gen_rtx_REG (Pmode, REGNO (base));
22243 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22244 part[1][0] = replace_equiv_address (part[1][0], base);
22245 for (i = 1; i < nparts; i++)
22247 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22248 part[1][i] = replace_equiv_address (part[1][i], tmp);
22253 if (push)
22255 if (!TARGET_64BIT)
22257 if (nparts == 3)
22259 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22260 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22261 stack_pointer_rtx, GEN_INT (-4)));
22262 emit_move_insn (part[0][2], part[1][2]);
22264 else if (nparts == 4)
22266 emit_move_insn (part[0][3], part[1][3]);
22267 emit_move_insn (part[0][2], part[1][2]);
22270 else
22272 /* In 64bit mode we don't have 32bit push available. In case this is
22273 register, it is OK - we will just use larger counterpart. We also
22274 retype memory - these comes from attempt to avoid REX prefix on
22275 moving of second half of TFmode value. */
22276 if (GET_MODE (part[1][1]) == SImode)
22278 switch (GET_CODE (part[1][1]))
22280 case MEM:
22281 part[1][1] = adjust_address (part[1][1], DImode, 0);
22282 break;
22284 case REG:
22285 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22286 break;
22288 default:
22289 gcc_unreachable ();
22292 if (GET_MODE (part[1][0]) == SImode)
22293 part[1][0] = part[1][1];
22296 emit_move_insn (part[0][1], part[1][1]);
22297 emit_move_insn (part[0][0], part[1][0]);
22298 return;
22301 /* Choose correct order to not overwrite the source before it is copied. */
22302 if ((REG_P (part[0][0])
22303 && REG_P (part[1][1])
22304 && (REGNO (part[0][0]) == REGNO (part[1][1])
22305 || (nparts == 3
22306 && REGNO (part[0][0]) == REGNO (part[1][2]))
22307 || (nparts == 4
22308 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22309 || (collisions > 0
22310 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22312 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22314 operands[2 + i] = part[0][j];
22315 operands[6 + i] = part[1][j];
22318 else
22320 for (i = 0; i < nparts; i++)
22322 operands[2 + i] = part[0][i];
22323 operands[6 + i] = part[1][i];
22327 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22328 if (optimize_insn_for_size_p ())
22330 for (j = 0; j < nparts - 1; j++)
22331 if (CONST_INT_P (operands[6 + j])
22332 && operands[6 + j] != const0_rtx
22333 && REG_P (operands[2 + j]))
22334 for (i = j; i < nparts - 1; i++)
22335 if (CONST_INT_P (operands[7 + i])
22336 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22337 operands[7 + i] = operands[2 + j];
22340 for (i = 0; i < nparts; i++)
22341 emit_move_insn (operands[2 + i], operands[6 + i]);
22343 return;
22346 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22347 left shift by a constant, either using a single shift or
22348 a sequence of add instructions. */
22350 static void
22351 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22353 rtx (*insn)(rtx, rtx, rtx);
22355 if (count == 1
22356 || (count * ix86_cost->add <= ix86_cost->shift_const
22357 && !optimize_insn_for_size_p ()))
22359 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22360 while (count-- > 0)
22361 emit_insn (insn (operand, operand, operand));
22363 else
22365 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22366 emit_insn (insn (operand, operand, GEN_INT (count)));
22370 void
22371 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22373 rtx (*gen_ashl3)(rtx, rtx, rtx);
22374 rtx (*gen_shld)(rtx, rtx, rtx);
22375 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22377 rtx low[2], high[2];
22378 int count;
22380 if (CONST_INT_P (operands[2]))
22382 split_double_mode (mode, operands, 2, low, high);
22383 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22385 if (count >= half_width)
22387 emit_move_insn (high[0], low[1]);
22388 emit_move_insn (low[0], const0_rtx);
22390 if (count > half_width)
22391 ix86_expand_ashl_const (high[0], count - half_width, mode);
22393 else
22395 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22397 if (!rtx_equal_p (operands[0], operands[1]))
22398 emit_move_insn (operands[0], operands[1]);
22400 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22401 ix86_expand_ashl_const (low[0], count, mode);
22403 return;
22406 split_double_mode (mode, operands, 1, low, high);
22408 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22410 if (operands[1] == const1_rtx)
22412 /* Assuming we've chosen a QImode capable registers, then 1 << N
22413 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22414 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22416 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22418 ix86_expand_clear (low[0]);
22419 ix86_expand_clear (high[0]);
22420 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22422 d = gen_lowpart (QImode, low[0]);
22423 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22424 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22425 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22427 d = gen_lowpart (QImode, high[0]);
22428 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22429 s = gen_rtx_NE (QImode, flags, const0_rtx);
22430 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22433 /* Otherwise, we can get the same results by manually performing
22434 a bit extract operation on bit 5/6, and then performing the two
22435 shifts. The two methods of getting 0/1 into low/high are exactly
22436 the same size. Avoiding the shift in the bit extract case helps
22437 pentium4 a bit; no one else seems to care much either way. */
22438 else
22440 enum machine_mode half_mode;
22441 rtx (*gen_lshr3)(rtx, rtx, rtx);
22442 rtx (*gen_and3)(rtx, rtx, rtx);
22443 rtx (*gen_xor3)(rtx, rtx, rtx);
22444 HOST_WIDE_INT bits;
22445 rtx x;
22447 if (mode == DImode)
22449 half_mode = SImode;
22450 gen_lshr3 = gen_lshrsi3;
22451 gen_and3 = gen_andsi3;
22452 gen_xor3 = gen_xorsi3;
22453 bits = 5;
22455 else
22457 half_mode = DImode;
22458 gen_lshr3 = gen_lshrdi3;
22459 gen_and3 = gen_anddi3;
22460 gen_xor3 = gen_xordi3;
22461 bits = 6;
22464 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22465 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22466 else
22467 x = gen_lowpart (half_mode, operands[2]);
22468 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22470 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22471 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22472 emit_move_insn (low[0], high[0]);
22473 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22476 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22477 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22478 return;
22481 if (operands[1] == constm1_rtx)
22483 /* For -1 << N, we can avoid the shld instruction, because we
22484 know that we're shifting 0...31/63 ones into a -1. */
22485 emit_move_insn (low[0], constm1_rtx);
22486 if (optimize_insn_for_size_p ())
22487 emit_move_insn (high[0], low[0]);
22488 else
22489 emit_move_insn (high[0], constm1_rtx);
22491 else
22493 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22495 if (!rtx_equal_p (operands[0], operands[1]))
22496 emit_move_insn (operands[0], operands[1]);
22498 split_double_mode (mode, operands, 1, low, high);
22499 emit_insn (gen_shld (high[0], low[0], operands[2]));
22502 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22504 if (TARGET_CMOVE && scratch)
22506 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22507 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22509 ix86_expand_clear (scratch);
22510 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22512 else
22514 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22515 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22517 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22521 void
22522 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22524 rtx (*gen_ashr3)(rtx, rtx, rtx)
22525 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22526 rtx (*gen_shrd)(rtx, rtx, rtx);
22527 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22529 rtx low[2], high[2];
22530 int count;
22532 if (CONST_INT_P (operands[2]))
22534 split_double_mode (mode, operands, 2, low, high);
22535 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22537 if (count == GET_MODE_BITSIZE (mode) - 1)
22539 emit_move_insn (high[0], high[1]);
22540 emit_insn (gen_ashr3 (high[0], high[0],
22541 GEN_INT (half_width - 1)));
22542 emit_move_insn (low[0], high[0]);
22545 else if (count >= half_width)
22547 emit_move_insn (low[0], high[1]);
22548 emit_move_insn (high[0], low[0]);
22549 emit_insn (gen_ashr3 (high[0], high[0],
22550 GEN_INT (half_width - 1)));
22552 if (count > half_width)
22553 emit_insn (gen_ashr3 (low[0], low[0],
22554 GEN_INT (count - half_width)));
22556 else
22558 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22560 if (!rtx_equal_p (operands[0], operands[1]))
22561 emit_move_insn (operands[0], operands[1]);
22563 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22564 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22567 else
22569 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22571 if (!rtx_equal_p (operands[0], operands[1]))
22572 emit_move_insn (operands[0], operands[1]);
22574 split_double_mode (mode, operands, 1, low, high);
22576 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22577 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22579 if (TARGET_CMOVE && scratch)
22581 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22582 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22584 emit_move_insn (scratch, high[0]);
22585 emit_insn (gen_ashr3 (scratch, scratch,
22586 GEN_INT (half_width - 1)));
22587 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22588 scratch));
22590 else
22592 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22593 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22595 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22600 void
22601 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22603 rtx (*gen_lshr3)(rtx, rtx, rtx)
22604 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22605 rtx (*gen_shrd)(rtx, rtx, rtx);
22606 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22608 rtx low[2], high[2];
22609 int count;
22611 if (CONST_INT_P (operands[2]))
22613 split_double_mode (mode, operands, 2, low, high);
22614 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22616 if (count >= half_width)
22618 emit_move_insn (low[0], high[1]);
22619 ix86_expand_clear (high[0]);
22621 if (count > half_width)
22622 emit_insn (gen_lshr3 (low[0], low[0],
22623 GEN_INT (count - half_width)));
22625 else
22627 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22629 if (!rtx_equal_p (operands[0], operands[1]))
22630 emit_move_insn (operands[0], operands[1]);
22632 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22633 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22636 else
22638 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22640 if (!rtx_equal_p (operands[0], operands[1]))
22641 emit_move_insn (operands[0], operands[1]);
22643 split_double_mode (mode, operands, 1, low, high);
22645 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22646 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22648 if (TARGET_CMOVE && scratch)
22650 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22651 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22653 ix86_expand_clear (scratch);
22654 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22655 scratch));
22657 else
22659 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22660 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22662 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22667 /* Predict just emitted jump instruction to be taken with probability PROB. */
22668 static void
22669 predict_jump (int prob)
22671 rtx insn = get_last_insn ();
22672 gcc_assert (JUMP_P (insn));
22673 add_int_reg_note (insn, REG_BR_PROB, prob);
22676 /* Helper function for the string operations below. Dest VARIABLE whether
22677 it is aligned to VALUE bytes. If true, jump to the label. */
22678 static rtx
22679 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22681 rtx label = gen_label_rtx ();
22682 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22683 if (GET_MODE (variable) == DImode)
22684 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22685 else
22686 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22687 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22688 1, label);
22689 if (epilogue)
22690 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22691 else
22692 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22693 return label;
22696 /* Adjust COUNTER by the VALUE. */
22697 static void
22698 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22700 rtx (*gen_add)(rtx, rtx, rtx)
22701 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22703 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22706 /* Zero extend possibly SImode EXP to Pmode register. */
22708 ix86_zero_extend_to_Pmode (rtx exp)
22710 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22713 /* Divide COUNTREG by SCALE. */
22714 static rtx
22715 scale_counter (rtx countreg, int scale)
22717 rtx sc;
22719 if (scale == 1)
22720 return countreg;
22721 if (CONST_INT_P (countreg))
22722 return GEN_INT (INTVAL (countreg) / scale);
22723 gcc_assert (REG_P (countreg));
22725 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22726 GEN_INT (exact_log2 (scale)),
22727 NULL, 1, OPTAB_DIRECT);
22728 return sc;
22731 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22732 DImode for constant loop counts. */
22734 static enum machine_mode
22735 counter_mode (rtx count_exp)
22737 if (GET_MODE (count_exp) != VOIDmode)
22738 return GET_MODE (count_exp);
22739 if (!CONST_INT_P (count_exp))
22740 return Pmode;
22741 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22742 return DImode;
22743 return SImode;
22746 /* Copy the address to a Pmode register. This is used for x32 to
22747 truncate DImode TLS address to a SImode register. */
22749 static rtx
22750 ix86_copy_addr_to_reg (rtx addr)
22752 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22753 return copy_addr_to_reg (addr);
22754 else
22756 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22757 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22761 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22762 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22763 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22764 memory by VALUE (supposed to be in MODE).
22766 The size is rounded down to whole number of chunk size moved at once.
22767 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22770 static void
22771 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22772 rtx destptr, rtx srcptr, rtx value,
22773 rtx count, enum machine_mode mode, int unroll,
22774 int expected_size, bool issetmem)
22776 rtx out_label, top_label, iter, tmp;
22777 enum machine_mode iter_mode = counter_mode (count);
22778 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22779 rtx piece_size = GEN_INT (piece_size_n);
22780 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22781 rtx size;
22782 int i;
22784 top_label = gen_label_rtx ();
22785 out_label = gen_label_rtx ();
22786 iter = gen_reg_rtx (iter_mode);
22788 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22789 NULL, 1, OPTAB_DIRECT);
22790 /* Those two should combine. */
22791 if (piece_size == const1_rtx)
22793 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22794 true, out_label);
22795 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22797 emit_move_insn (iter, const0_rtx);
22799 emit_label (top_label);
22801 tmp = convert_modes (Pmode, iter_mode, iter, true);
22803 /* This assert could be relaxed - in this case we'll need to compute
22804 smallest power of two, containing in PIECE_SIZE_N and pass it to
22805 offset_address. */
22806 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22807 destmem = offset_address (destmem, tmp, piece_size_n);
22808 destmem = adjust_address (destmem, mode, 0);
22810 if (!issetmem)
22812 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22813 srcmem = adjust_address (srcmem, mode, 0);
22815 /* When unrolling for chips that reorder memory reads and writes,
22816 we can save registers by using single temporary.
22817 Also using 4 temporaries is overkill in 32bit mode. */
22818 if (!TARGET_64BIT && 0)
22820 for (i = 0; i < unroll; i++)
22822 if (i)
22824 destmem =
22825 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22826 srcmem =
22827 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22829 emit_move_insn (destmem, srcmem);
22832 else
22834 rtx tmpreg[4];
22835 gcc_assert (unroll <= 4);
22836 for (i = 0; i < unroll; i++)
22838 tmpreg[i] = gen_reg_rtx (mode);
22839 if (i)
22841 srcmem =
22842 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22844 emit_move_insn (tmpreg[i], srcmem);
22846 for (i = 0; i < unroll; i++)
22848 if (i)
22850 destmem =
22851 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22853 emit_move_insn (destmem, tmpreg[i]);
22857 else
22858 for (i = 0; i < unroll; i++)
22860 if (i)
22861 destmem =
22862 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22863 emit_move_insn (destmem, value);
22866 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22867 true, OPTAB_LIB_WIDEN);
22868 if (tmp != iter)
22869 emit_move_insn (iter, tmp);
22871 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22872 true, top_label);
22873 if (expected_size != -1)
22875 expected_size /= GET_MODE_SIZE (mode) * unroll;
22876 if (expected_size == 0)
22877 predict_jump (0);
22878 else if (expected_size > REG_BR_PROB_BASE)
22879 predict_jump (REG_BR_PROB_BASE - 1);
22880 else
22881 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22883 else
22884 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22885 iter = ix86_zero_extend_to_Pmode (iter);
22886 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22887 true, OPTAB_LIB_WIDEN);
22888 if (tmp != destptr)
22889 emit_move_insn (destptr, tmp);
22890 if (!issetmem)
22892 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22893 true, OPTAB_LIB_WIDEN);
22894 if (tmp != srcptr)
22895 emit_move_insn (srcptr, tmp);
22897 emit_label (out_label);
22900 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22901 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22902 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22903 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22904 ORIG_VALUE is the original value passed to memset to fill the memory with.
22905 Other arguments have same meaning as for previous function. */
22907 static void
22908 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22909 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22910 rtx count,
22911 enum machine_mode mode, bool issetmem)
22913 rtx destexp;
22914 rtx srcexp;
22915 rtx countreg;
22916 HOST_WIDE_INT rounded_count;
22918 /* If possible, it is shorter to use rep movs.
22919 TODO: Maybe it is better to move this logic to decide_alg. */
22920 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22921 && (!issetmem || orig_value == const0_rtx))
22922 mode = SImode;
22924 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22925 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22927 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22928 GET_MODE_SIZE (mode)));
22929 if (mode != QImode)
22931 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22932 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22933 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22935 else
22936 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22937 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22939 rounded_count = (INTVAL (count)
22940 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22941 destmem = shallow_copy_rtx (destmem);
22942 set_mem_size (destmem, rounded_count);
22944 else if (MEM_SIZE_KNOWN_P (destmem))
22945 clear_mem_size (destmem);
22947 if (issetmem)
22949 value = force_reg (mode, gen_lowpart (mode, value));
22950 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22952 else
22954 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22955 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22956 if (mode != QImode)
22958 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22959 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22960 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22962 else
22963 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22964 if (CONST_INT_P (count))
22966 rounded_count = (INTVAL (count)
22967 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22968 srcmem = shallow_copy_rtx (srcmem);
22969 set_mem_size (srcmem, rounded_count);
22971 else
22973 if (MEM_SIZE_KNOWN_P (srcmem))
22974 clear_mem_size (srcmem);
22976 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22977 destexp, srcexp));
22981 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22982 DESTMEM.
22983 SRC is passed by pointer to be updated on return.
22984 Return value is updated DST. */
22985 static rtx
22986 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
22987 HOST_WIDE_INT size_to_move)
22989 rtx dst = destmem, src = *srcmem, adjust, tempreg;
22990 enum insn_code code;
22991 enum machine_mode move_mode;
22992 int piece_size, i;
22994 /* Find the widest mode in which we could perform moves.
22995 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22996 it until move of such size is supported. */
22997 piece_size = 1 << floor_log2 (size_to_move);
22998 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
22999 code = optab_handler (mov_optab, move_mode);
23000 while (code == CODE_FOR_nothing && piece_size > 1)
23002 piece_size >>= 1;
23003 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23004 code = optab_handler (mov_optab, move_mode);
23007 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23008 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23009 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23011 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23012 move_mode = mode_for_vector (word_mode, nunits);
23013 code = optab_handler (mov_optab, move_mode);
23014 if (code == CODE_FOR_nothing)
23016 move_mode = word_mode;
23017 piece_size = GET_MODE_SIZE (move_mode);
23018 code = optab_handler (mov_optab, move_mode);
23021 gcc_assert (code != CODE_FOR_nothing);
23023 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23024 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23026 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23027 gcc_assert (size_to_move % piece_size == 0);
23028 adjust = GEN_INT (piece_size);
23029 for (i = 0; i < size_to_move; i += piece_size)
23031 /* We move from memory to memory, so we'll need to do it via
23032 a temporary register. */
23033 tempreg = gen_reg_rtx (move_mode);
23034 emit_insn (GEN_FCN (code) (tempreg, src));
23035 emit_insn (GEN_FCN (code) (dst, tempreg));
23037 emit_move_insn (destptr,
23038 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23039 emit_move_insn (srcptr,
23040 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23042 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23043 piece_size);
23044 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23045 piece_size);
23048 /* Update DST and SRC rtx. */
23049 *srcmem = src;
23050 return dst;
23053 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23054 static void
23055 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23056 rtx destptr, rtx srcptr, rtx count, int max_size)
23058 rtx src, dest;
23059 if (CONST_INT_P (count))
23061 HOST_WIDE_INT countval = INTVAL (count);
23062 HOST_WIDE_INT epilogue_size = countval % max_size;
23063 int i;
23065 /* For now MAX_SIZE should be a power of 2. This assert could be
23066 relaxed, but it'll require a bit more complicated epilogue
23067 expanding. */
23068 gcc_assert ((max_size & (max_size - 1)) == 0);
23069 for (i = max_size; i >= 1; i >>= 1)
23071 if (epilogue_size & i)
23072 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23074 return;
23076 if (max_size > 8)
23078 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23079 count, 1, OPTAB_DIRECT);
23080 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23081 count, QImode, 1, 4, false);
23082 return;
23085 /* When there are stringops, we can cheaply increase dest and src pointers.
23086 Otherwise we save code size by maintaining offset (zero is readily
23087 available from preceding rep operation) and using x86 addressing modes.
23089 if (TARGET_SINGLE_STRINGOP)
23091 if (max_size > 4)
23093 rtx label = ix86_expand_aligntest (count, 4, true);
23094 src = change_address (srcmem, SImode, srcptr);
23095 dest = change_address (destmem, SImode, destptr);
23096 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23097 emit_label (label);
23098 LABEL_NUSES (label) = 1;
23100 if (max_size > 2)
23102 rtx label = ix86_expand_aligntest (count, 2, true);
23103 src = change_address (srcmem, HImode, srcptr);
23104 dest = change_address (destmem, HImode, destptr);
23105 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23106 emit_label (label);
23107 LABEL_NUSES (label) = 1;
23109 if (max_size > 1)
23111 rtx label = ix86_expand_aligntest (count, 1, true);
23112 src = change_address (srcmem, QImode, srcptr);
23113 dest = change_address (destmem, QImode, destptr);
23114 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23115 emit_label (label);
23116 LABEL_NUSES (label) = 1;
23119 else
23121 rtx offset = force_reg (Pmode, const0_rtx);
23122 rtx tmp;
23124 if (max_size > 4)
23126 rtx label = ix86_expand_aligntest (count, 4, true);
23127 src = change_address (srcmem, SImode, srcptr);
23128 dest = change_address (destmem, SImode, destptr);
23129 emit_move_insn (dest, src);
23130 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23131 true, OPTAB_LIB_WIDEN);
23132 if (tmp != offset)
23133 emit_move_insn (offset, tmp);
23134 emit_label (label);
23135 LABEL_NUSES (label) = 1;
23137 if (max_size > 2)
23139 rtx label = ix86_expand_aligntest (count, 2, true);
23140 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23141 src = change_address (srcmem, HImode, tmp);
23142 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23143 dest = change_address (destmem, HImode, tmp);
23144 emit_move_insn (dest, src);
23145 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23146 true, OPTAB_LIB_WIDEN);
23147 if (tmp != offset)
23148 emit_move_insn (offset, tmp);
23149 emit_label (label);
23150 LABEL_NUSES (label) = 1;
23152 if (max_size > 1)
23154 rtx label = ix86_expand_aligntest (count, 1, true);
23155 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23156 src = change_address (srcmem, QImode, tmp);
23157 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23158 dest = change_address (destmem, QImode, tmp);
23159 emit_move_insn (dest, src);
23160 emit_label (label);
23161 LABEL_NUSES (label) = 1;
23166 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23167 with value PROMOTED_VAL.
23168 SRC is passed by pointer to be updated on return.
23169 Return value is updated DST. */
23170 static rtx
23171 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23172 HOST_WIDE_INT size_to_move)
23174 rtx dst = destmem, adjust;
23175 enum insn_code code;
23176 enum machine_mode move_mode;
23177 int piece_size, i;
23179 /* Find the widest mode in which we could perform moves.
23180 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23181 it until move of such size is supported. */
23182 move_mode = GET_MODE (promoted_val);
23183 if (move_mode == VOIDmode)
23184 move_mode = QImode;
23185 if (size_to_move < GET_MODE_SIZE (move_mode))
23187 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23188 promoted_val = gen_lowpart (move_mode, promoted_val);
23190 piece_size = GET_MODE_SIZE (move_mode);
23191 code = optab_handler (mov_optab, move_mode);
23192 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23194 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23196 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23197 gcc_assert (size_to_move % piece_size == 0);
23198 adjust = GEN_INT (piece_size);
23199 for (i = 0; i < size_to_move; i += piece_size)
23201 if (piece_size <= GET_MODE_SIZE (word_mode))
23203 emit_insn (gen_strset (destptr, dst, promoted_val));
23204 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23205 piece_size);
23206 continue;
23209 emit_insn (GEN_FCN (code) (dst, promoted_val));
23211 emit_move_insn (destptr,
23212 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23214 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23215 piece_size);
23218 /* Update DST rtx. */
23219 return dst;
23221 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23222 static void
23223 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23224 rtx count, int max_size)
23226 count =
23227 expand_simple_binop (counter_mode (count), AND, count,
23228 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23229 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23230 gen_lowpart (QImode, value), count, QImode,
23231 1, max_size / 2, true);
23234 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23235 static void
23236 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23237 rtx count, int max_size)
23239 rtx dest;
23241 if (CONST_INT_P (count))
23243 HOST_WIDE_INT countval = INTVAL (count);
23244 HOST_WIDE_INT epilogue_size = countval % max_size;
23245 int i;
23247 /* For now MAX_SIZE should be a power of 2. This assert could be
23248 relaxed, but it'll require a bit more complicated epilogue
23249 expanding. */
23250 gcc_assert ((max_size & (max_size - 1)) == 0);
23251 for (i = max_size; i >= 1; i >>= 1)
23253 if (epilogue_size & i)
23255 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23256 destmem = emit_memset (destmem, destptr, vec_value, i);
23257 else
23258 destmem = emit_memset (destmem, destptr, value, i);
23261 return;
23263 if (max_size > 32)
23265 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23266 return;
23268 if (max_size > 16)
23270 rtx label = ix86_expand_aligntest (count, 16, true);
23271 if (TARGET_64BIT)
23273 dest = change_address (destmem, DImode, destptr);
23274 emit_insn (gen_strset (destptr, dest, value));
23275 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23276 emit_insn (gen_strset (destptr, dest, value));
23278 else
23280 dest = change_address (destmem, SImode, destptr);
23281 emit_insn (gen_strset (destptr, dest, value));
23282 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23283 emit_insn (gen_strset (destptr, dest, value));
23284 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23285 emit_insn (gen_strset (destptr, dest, value));
23286 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23287 emit_insn (gen_strset (destptr, dest, value));
23289 emit_label (label);
23290 LABEL_NUSES (label) = 1;
23292 if (max_size > 8)
23294 rtx label = ix86_expand_aligntest (count, 8, true);
23295 if (TARGET_64BIT)
23297 dest = change_address (destmem, DImode, destptr);
23298 emit_insn (gen_strset (destptr, dest, value));
23300 else
23302 dest = change_address (destmem, SImode, destptr);
23303 emit_insn (gen_strset (destptr, dest, value));
23304 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23305 emit_insn (gen_strset (destptr, dest, value));
23307 emit_label (label);
23308 LABEL_NUSES (label) = 1;
23310 if (max_size > 4)
23312 rtx label = ix86_expand_aligntest (count, 4, true);
23313 dest = change_address (destmem, SImode, destptr);
23314 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23315 emit_label (label);
23316 LABEL_NUSES (label) = 1;
23318 if (max_size > 2)
23320 rtx label = ix86_expand_aligntest (count, 2, true);
23321 dest = change_address (destmem, HImode, destptr);
23322 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23323 emit_label (label);
23324 LABEL_NUSES (label) = 1;
23326 if (max_size > 1)
23328 rtx label = ix86_expand_aligntest (count, 1, true);
23329 dest = change_address (destmem, QImode, destptr);
23330 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23331 emit_label (label);
23332 LABEL_NUSES (label) = 1;
23336 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23337 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23338 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23339 ignored.
23340 Return value is updated DESTMEM. */
23341 static rtx
23342 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23343 rtx destptr, rtx srcptr, rtx value,
23344 rtx vec_value, rtx count, int align,
23345 int desired_alignment, bool issetmem)
23347 int i;
23348 for (i = 1; i < desired_alignment; i <<= 1)
23350 if (align <= i)
23352 rtx label = ix86_expand_aligntest (destptr, i, false);
23353 if (issetmem)
23355 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23356 destmem = emit_memset (destmem, destptr, vec_value, i);
23357 else
23358 destmem = emit_memset (destmem, destptr, value, i);
23360 else
23361 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23362 ix86_adjust_counter (count, i);
23363 emit_label (label);
23364 LABEL_NUSES (label) = 1;
23365 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23368 return destmem;
23371 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23372 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23373 and jump to DONE_LABEL. */
23374 static void
23375 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23376 rtx destptr, rtx srcptr,
23377 rtx value, rtx vec_value,
23378 rtx count, int size,
23379 rtx done_label, bool issetmem)
23381 rtx label = ix86_expand_aligntest (count, size, false);
23382 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23383 rtx modesize;
23384 int n;
23386 /* If we do not have vector value to copy, we must reduce size. */
23387 if (issetmem)
23389 if (!vec_value)
23391 if (GET_MODE (value) == VOIDmode && size > 8)
23392 mode = Pmode;
23393 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23394 mode = GET_MODE (value);
23396 else
23397 mode = GET_MODE (vec_value), value = vec_value;
23399 else
23401 /* Choose appropriate vector mode. */
23402 if (size >= 32)
23403 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23404 else if (size >= 16)
23405 mode = TARGET_SSE ? V16QImode : DImode;
23406 srcmem = change_address (srcmem, mode, srcptr);
23408 destmem = change_address (destmem, mode, destptr);
23409 modesize = GEN_INT (GET_MODE_SIZE (mode));
23410 gcc_assert (GET_MODE_SIZE (mode) <= size);
23411 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23413 if (issetmem)
23414 emit_move_insn (destmem, gen_lowpart (mode, value));
23415 else
23417 emit_move_insn (destmem, srcmem);
23418 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23420 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23423 destmem = offset_address (destmem, count, 1);
23424 destmem = offset_address (destmem, GEN_INT (-2 * size),
23425 GET_MODE_SIZE (mode));
23426 if (!issetmem)
23428 srcmem = offset_address (srcmem, count, 1);
23429 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23430 GET_MODE_SIZE (mode));
23432 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23434 if (issetmem)
23435 emit_move_insn (destmem, gen_lowpart (mode, value));
23436 else
23438 emit_move_insn (destmem, srcmem);
23439 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23441 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23443 emit_jump_insn (gen_jump (done_label));
23444 emit_barrier ();
23446 emit_label (label);
23447 LABEL_NUSES (label) = 1;
23450 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23451 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23452 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23453 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23454 DONE_LABEL is a label after the whole copying sequence. The label is created
23455 on demand if *DONE_LABEL is NULL.
23456 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23457 bounds after the initial copies.
23459 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23460 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23461 we will dispatch to a library call for large blocks.
23463 In pseudocode we do:
23465 if (COUNT < SIZE)
23467 Assume that SIZE is 4. Bigger sizes are handled analogously
23468 if (COUNT & 4)
23470 copy 4 bytes from SRCPTR to DESTPTR
23471 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23472 goto done_label
23474 if (!COUNT)
23475 goto done_label;
23476 copy 1 byte from SRCPTR to DESTPTR
23477 if (COUNT & 2)
23479 copy 2 bytes from SRCPTR to DESTPTR
23480 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23483 else
23485 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23486 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23488 OLD_DESPTR = DESTPTR;
23489 Align DESTPTR up to DESIRED_ALIGN
23490 SRCPTR += DESTPTR - OLD_DESTPTR
23491 COUNT -= DEST_PTR - OLD_DESTPTR
23492 if (DYNAMIC_CHECK)
23493 Round COUNT down to multiple of SIZE
23494 << optional caller supplied zero size guard is here >>
23495 << optional caller suppplied dynamic check is here >>
23496 << caller supplied main copy loop is here >>
23498 done_label:
23500 static void
23501 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23502 rtx *destptr, rtx *srcptr,
23503 enum machine_mode mode,
23504 rtx value, rtx vec_value,
23505 rtx *count,
23506 rtx *done_label,
23507 int size,
23508 int desired_align,
23509 int align,
23510 unsigned HOST_WIDE_INT *min_size,
23511 bool dynamic_check,
23512 bool issetmem)
23514 rtx loop_label = NULL, label;
23515 int n;
23516 rtx modesize;
23517 int prolog_size = 0;
23518 rtx mode_value;
23520 /* Chose proper value to copy. */
23521 if (issetmem && VECTOR_MODE_P (mode))
23522 mode_value = vec_value;
23523 else
23524 mode_value = value;
23525 gcc_assert (GET_MODE_SIZE (mode) <= size);
23527 /* See if block is big or small, handle small blocks. */
23528 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23530 int size2 = size;
23531 loop_label = gen_label_rtx ();
23533 if (!*done_label)
23534 *done_label = gen_label_rtx ();
23536 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23537 1, loop_label);
23538 size2 >>= 1;
23540 /* Handle sizes > 3. */
23541 for (;size2 > 2; size2 >>= 1)
23542 expand_small_movmem_or_setmem (destmem, srcmem,
23543 *destptr, *srcptr,
23544 value, vec_value,
23545 *count,
23546 size2, *done_label, issetmem);
23547 /* Nothing to copy? Jump to DONE_LABEL if so */
23548 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23549 1, *done_label);
23551 /* Do a byte copy. */
23552 destmem = change_address (destmem, QImode, *destptr);
23553 if (issetmem)
23554 emit_move_insn (destmem, gen_lowpart (QImode, value));
23555 else
23557 srcmem = change_address (srcmem, QImode, *srcptr);
23558 emit_move_insn (destmem, srcmem);
23561 /* Handle sizes 2 and 3. */
23562 label = ix86_expand_aligntest (*count, 2, false);
23563 destmem = change_address (destmem, HImode, *destptr);
23564 destmem = offset_address (destmem, *count, 1);
23565 destmem = offset_address (destmem, GEN_INT (-2), 2);
23566 if (issetmem)
23567 emit_move_insn (destmem, gen_lowpart (HImode, value));
23568 else
23570 srcmem = change_address (srcmem, HImode, *srcptr);
23571 srcmem = offset_address (srcmem, *count, 1);
23572 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23573 emit_move_insn (destmem, srcmem);
23576 emit_label (label);
23577 LABEL_NUSES (label) = 1;
23578 emit_jump_insn (gen_jump (*done_label));
23579 emit_barrier ();
23581 else
23582 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23583 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23585 /* Start memcpy for COUNT >= SIZE. */
23586 if (loop_label)
23588 emit_label (loop_label);
23589 LABEL_NUSES (loop_label) = 1;
23592 /* Copy first desired_align bytes. */
23593 if (!issetmem)
23594 srcmem = change_address (srcmem, mode, *srcptr);
23595 destmem = change_address (destmem, mode, *destptr);
23596 modesize = GEN_INT (GET_MODE_SIZE (mode));
23597 for (n = 0; prolog_size < desired_align - align; n++)
23599 if (issetmem)
23600 emit_move_insn (destmem, mode_value);
23601 else
23603 emit_move_insn (destmem, srcmem);
23604 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23606 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23607 prolog_size += GET_MODE_SIZE (mode);
23611 /* Copy last SIZE bytes. */
23612 destmem = offset_address (destmem, *count, 1);
23613 destmem = offset_address (destmem,
23614 GEN_INT (-size - prolog_size),
23616 if (issetmem)
23617 emit_move_insn (destmem, mode_value);
23618 else
23620 srcmem = offset_address (srcmem, *count, 1);
23621 srcmem = offset_address (srcmem,
23622 GEN_INT (-size - prolog_size),
23624 emit_move_insn (destmem, srcmem);
23626 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23628 destmem = offset_address (destmem, modesize, 1);
23629 if (issetmem)
23630 emit_move_insn (destmem, mode_value);
23631 else
23633 srcmem = offset_address (srcmem, modesize, 1);
23634 emit_move_insn (destmem, srcmem);
23638 /* Align destination. */
23639 if (desired_align > 1 && desired_align > align)
23641 rtx saveddest = *destptr;
23643 gcc_assert (desired_align <= size);
23644 /* Align destptr up, place it to new register. */
23645 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23646 GEN_INT (prolog_size),
23647 NULL_RTX, 1, OPTAB_DIRECT);
23648 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23649 GEN_INT (-desired_align),
23650 *destptr, 1, OPTAB_DIRECT);
23651 /* See how many bytes we skipped. */
23652 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23653 *destptr,
23654 saveddest, 1, OPTAB_DIRECT);
23655 /* Adjust srcptr and count. */
23656 if (!issetmem)
23657 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23658 *srcptr, 1, OPTAB_DIRECT);
23659 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23660 saveddest, *count, 1, OPTAB_DIRECT);
23661 /* We copied at most size + prolog_size. */
23662 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23663 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23664 else
23665 *min_size = 0;
23667 /* Our loops always round down the bock size, but for dispatch to library
23668 we need precise value. */
23669 if (dynamic_check)
23670 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23671 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23673 else
23675 gcc_assert (prolog_size == 0);
23676 /* Decrease count, so we won't end up copying last word twice. */
23677 if (!CONST_INT_P (*count))
23678 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23679 constm1_rtx, *count, 1, OPTAB_DIRECT);
23680 else
23681 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23682 if (*min_size)
23683 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23688 /* This function is like the previous one, except here we know how many bytes
23689 need to be copied. That allows us to update alignment not only of DST, which
23690 is returned, but also of SRC, which is passed as a pointer for that
23691 reason. */
23692 static rtx
23693 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23694 rtx srcreg, rtx value, rtx vec_value,
23695 int desired_align, int align_bytes,
23696 bool issetmem)
23698 rtx src = NULL;
23699 rtx orig_dst = dst;
23700 rtx orig_src = NULL;
23701 int piece_size = 1;
23702 int copied_bytes = 0;
23704 if (!issetmem)
23706 gcc_assert (srcp != NULL);
23707 src = *srcp;
23708 orig_src = src;
23711 for (piece_size = 1;
23712 piece_size <= desired_align && copied_bytes < align_bytes;
23713 piece_size <<= 1)
23715 if (align_bytes & piece_size)
23717 if (issetmem)
23719 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23720 dst = emit_memset (dst, destreg, vec_value, piece_size);
23721 else
23722 dst = emit_memset (dst, destreg, value, piece_size);
23724 else
23725 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23726 copied_bytes += piece_size;
23729 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23730 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23731 if (MEM_SIZE_KNOWN_P (orig_dst))
23732 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23734 if (!issetmem)
23736 int src_align_bytes = get_mem_align_offset (src, desired_align
23737 * BITS_PER_UNIT);
23738 if (src_align_bytes >= 0)
23739 src_align_bytes = desired_align - src_align_bytes;
23740 if (src_align_bytes >= 0)
23742 unsigned int src_align;
23743 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23745 if ((src_align_bytes & (src_align - 1))
23746 == (align_bytes & (src_align - 1)))
23747 break;
23749 if (src_align > (unsigned int) desired_align)
23750 src_align = desired_align;
23751 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23752 set_mem_align (src, src_align * BITS_PER_UNIT);
23754 if (MEM_SIZE_KNOWN_P (orig_src))
23755 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23756 *srcp = src;
23759 return dst;
23762 /* Return true if ALG can be used in current context.
23763 Assume we expand memset if MEMSET is true. */
23764 static bool
23765 alg_usable_p (enum stringop_alg alg, bool memset)
23767 if (alg == no_stringop)
23768 return false;
23769 if (alg == vector_loop)
23770 return TARGET_SSE || TARGET_AVX;
23771 /* Algorithms using the rep prefix want at least edi and ecx;
23772 additionally, memset wants eax and memcpy wants esi. Don't
23773 consider such algorithms if the user has appropriated those
23774 registers for their own purposes. */
23775 if (alg == rep_prefix_1_byte
23776 || alg == rep_prefix_4_byte
23777 || alg == rep_prefix_8_byte)
23778 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23779 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23780 return true;
23783 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23784 static enum stringop_alg
23785 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23786 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23787 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23789 const struct stringop_algs * algs;
23790 bool optimize_for_speed;
23791 int max = -1;
23792 const struct processor_costs *cost;
23793 int i;
23794 bool any_alg_usable_p = false;
23796 *noalign = false;
23797 *dynamic_check = -1;
23799 /* Even if the string operation call is cold, we still might spend a lot
23800 of time processing large blocks. */
23801 if (optimize_function_for_size_p (cfun)
23802 || (optimize_insn_for_size_p ()
23803 && (max_size < 256
23804 || (expected_size != -1 && expected_size < 256))))
23805 optimize_for_speed = false;
23806 else
23807 optimize_for_speed = true;
23809 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23810 if (memset)
23811 algs = &cost->memset[TARGET_64BIT != 0];
23812 else
23813 algs = &cost->memcpy[TARGET_64BIT != 0];
23815 /* See maximal size for user defined algorithm. */
23816 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23818 enum stringop_alg candidate = algs->size[i].alg;
23819 bool usable = alg_usable_p (candidate, memset);
23820 any_alg_usable_p |= usable;
23822 if (candidate != libcall && candidate && usable)
23823 max = algs->size[i].max;
23826 /* If expected size is not known but max size is small enough
23827 so inline version is a win, set expected size into
23828 the range. */
23829 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23830 && expected_size == -1)
23831 expected_size = min_size / 2 + max_size / 2;
23833 /* If user specified the algorithm, honnor it if possible. */
23834 if (ix86_stringop_alg != no_stringop
23835 && alg_usable_p (ix86_stringop_alg, memset))
23836 return ix86_stringop_alg;
23837 /* rep; movq or rep; movl is the smallest variant. */
23838 else if (!optimize_for_speed)
23840 *noalign = true;
23841 if (!count || (count & 3) || (memset && !zero_memset))
23842 return alg_usable_p (rep_prefix_1_byte, memset)
23843 ? rep_prefix_1_byte : loop_1_byte;
23844 else
23845 return alg_usable_p (rep_prefix_4_byte, memset)
23846 ? rep_prefix_4_byte : loop;
23848 /* Very tiny blocks are best handled via the loop, REP is expensive to
23849 setup. */
23850 else if (expected_size != -1 && expected_size < 4)
23851 return loop_1_byte;
23852 else if (expected_size != -1)
23854 enum stringop_alg alg = libcall;
23855 bool alg_noalign = false;
23856 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23858 /* We get here if the algorithms that were not libcall-based
23859 were rep-prefix based and we are unable to use rep prefixes
23860 based on global register usage. Break out of the loop and
23861 use the heuristic below. */
23862 if (algs->size[i].max == 0)
23863 break;
23864 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23866 enum stringop_alg candidate = algs->size[i].alg;
23868 if (candidate != libcall && alg_usable_p (candidate, memset))
23870 alg = candidate;
23871 alg_noalign = algs->size[i].noalign;
23873 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23874 last non-libcall inline algorithm. */
23875 if (TARGET_INLINE_ALL_STRINGOPS)
23877 /* When the current size is best to be copied by a libcall,
23878 but we are still forced to inline, run the heuristic below
23879 that will pick code for medium sized blocks. */
23880 if (alg != libcall)
23882 *noalign = alg_noalign;
23883 return alg;
23885 break;
23887 else if (alg_usable_p (candidate, memset))
23889 *noalign = algs->size[i].noalign;
23890 return candidate;
23895 /* When asked to inline the call anyway, try to pick meaningful choice.
23896 We look for maximal size of block that is faster to copy by hand and
23897 take blocks of at most of that size guessing that average size will
23898 be roughly half of the block.
23900 If this turns out to be bad, we might simply specify the preferred
23901 choice in ix86_costs. */
23902 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23903 && (algs->unknown_size == libcall
23904 || !alg_usable_p (algs->unknown_size, memset)))
23906 enum stringop_alg alg;
23908 /* If there aren't any usable algorithms, then recursing on
23909 smaller sizes isn't going to find anything. Just return the
23910 simple byte-at-a-time copy loop. */
23911 if (!any_alg_usable_p)
23913 /* Pick something reasonable. */
23914 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23915 *dynamic_check = 128;
23916 return loop_1_byte;
23918 if (max == -1)
23919 max = 4096;
23920 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23921 zero_memset, dynamic_check, noalign);
23922 gcc_assert (*dynamic_check == -1);
23923 gcc_assert (alg != libcall);
23924 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23925 *dynamic_check = max;
23926 return alg;
23928 return (alg_usable_p (algs->unknown_size, memset)
23929 ? algs->unknown_size : libcall);
23932 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23933 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23934 static int
23935 decide_alignment (int align,
23936 enum stringop_alg alg,
23937 int expected_size,
23938 enum machine_mode move_mode)
23940 int desired_align = 0;
23942 gcc_assert (alg != no_stringop);
23944 if (alg == libcall)
23945 return 0;
23946 if (move_mode == VOIDmode)
23947 return 0;
23949 desired_align = GET_MODE_SIZE (move_mode);
23950 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23951 copying whole cacheline at once. */
23952 if (TARGET_PENTIUMPRO
23953 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23954 desired_align = 8;
23956 if (optimize_size)
23957 desired_align = 1;
23958 if (desired_align < align)
23959 desired_align = align;
23960 if (expected_size != -1 && expected_size < 4)
23961 desired_align = align;
23963 return desired_align;
23967 /* Helper function for memcpy. For QImode value 0xXY produce
23968 0xXYXYXYXY of wide specified by MODE. This is essentially
23969 a * 0x10101010, but we can do slightly better than
23970 synth_mult by unwinding the sequence by hand on CPUs with
23971 slow multiply. */
23972 static rtx
23973 promote_duplicated_reg (enum machine_mode mode, rtx val)
23975 enum machine_mode valmode = GET_MODE (val);
23976 rtx tmp;
23977 int nops = mode == DImode ? 3 : 2;
23979 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
23980 if (val == const0_rtx)
23981 return copy_to_mode_reg (mode, CONST0_RTX (mode));
23982 if (CONST_INT_P (val))
23984 HOST_WIDE_INT v = INTVAL (val) & 255;
23986 v |= v << 8;
23987 v |= v << 16;
23988 if (mode == DImode)
23989 v |= (v << 16) << 16;
23990 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
23993 if (valmode == VOIDmode)
23994 valmode = QImode;
23995 if (valmode != QImode)
23996 val = gen_lowpart (QImode, val);
23997 if (mode == QImode)
23998 return val;
23999 if (!TARGET_PARTIAL_REG_STALL)
24000 nops--;
24001 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24002 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24003 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24004 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24006 rtx reg = convert_modes (mode, QImode, val, true);
24007 tmp = promote_duplicated_reg (mode, const1_rtx);
24008 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24009 OPTAB_DIRECT);
24011 else
24013 rtx reg = convert_modes (mode, QImode, val, true);
24015 if (!TARGET_PARTIAL_REG_STALL)
24016 if (mode == SImode)
24017 emit_insn (gen_movsi_insv_1 (reg, reg));
24018 else
24019 emit_insn (gen_movdi_insv_1 (reg, reg));
24020 else
24022 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24023 NULL, 1, OPTAB_DIRECT);
24024 reg =
24025 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24027 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24028 NULL, 1, OPTAB_DIRECT);
24029 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24030 if (mode == SImode)
24031 return reg;
24032 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24033 NULL, 1, OPTAB_DIRECT);
24034 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24035 return reg;
24039 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24040 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24041 alignment from ALIGN to DESIRED_ALIGN. */
24042 static rtx
24043 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24044 int align)
24046 rtx promoted_val;
24048 if (TARGET_64BIT
24049 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24050 promoted_val = promote_duplicated_reg (DImode, val);
24051 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24052 promoted_val = promote_duplicated_reg (SImode, val);
24053 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24054 promoted_val = promote_duplicated_reg (HImode, val);
24055 else
24056 promoted_val = val;
24058 return promoted_val;
24061 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24062 operations when profitable. The code depends upon architecture, block size
24063 and alignment, but always has one of the following overall structures:
24065 Aligned move sequence:
24067 1) Prologue guard: Conditional that jumps up to epilogues for small
24068 blocks that can be handled by epilogue alone. This is faster
24069 but also needed for correctness, since prologue assume the block
24070 is larger than the desired alignment.
24072 Optional dynamic check for size and libcall for large
24073 blocks is emitted here too, with -minline-stringops-dynamically.
24075 2) Prologue: copy first few bytes in order to get destination
24076 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24077 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24078 copied. We emit either a jump tree on power of two sized
24079 blocks, or a byte loop.
24081 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24082 with specified algorithm.
24084 4) Epilogue: code copying tail of the block that is too small to be
24085 handled by main body (or up to size guarded by prologue guard).
24087 Misaligned move sequence
24089 1) missaligned move prologue/epilogue containing:
24090 a) Prologue handling small memory blocks and jumping to done_label
24091 (skipped if blocks are known to be large enough)
24092 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24093 needed by single possibly misaligned move
24094 (skipped if alignment is not needed)
24095 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24097 2) Zero size guard dispatching to done_label, if needed
24099 3) dispatch to library call, if needed,
24101 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24102 with specified algorithm. */
24103 bool
24104 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24105 rtx align_exp, rtx expected_align_exp,
24106 rtx expected_size_exp, rtx min_size_exp,
24107 rtx max_size_exp, rtx probable_max_size_exp,
24108 bool issetmem)
24110 rtx destreg;
24111 rtx srcreg = NULL;
24112 rtx label = NULL;
24113 rtx tmp;
24114 rtx jump_around_label = NULL;
24115 HOST_WIDE_INT align = 1;
24116 unsigned HOST_WIDE_INT count = 0;
24117 HOST_WIDE_INT expected_size = -1;
24118 int size_needed = 0, epilogue_size_needed;
24119 int desired_align = 0, align_bytes = 0;
24120 enum stringop_alg alg;
24121 rtx promoted_val = NULL;
24122 rtx vec_promoted_val = NULL;
24123 bool force_loopy_epilogue = false;
24124 int dynamic_check;
24125 bool need_zero_guard = false;
24126 bool noalign;
24127 enum machine_mode move_mode = VOIDmode;
24128 int unroll_factor = 1;
24129 /* TODO: Once value ranges are available, fill in proper data. */
24130 unsigned HOST_WIDE_INT min_size = 0;
24131 unsigned HOST_WIDE_INT max_size = -1;
24132 unsigned HOST_WIDE_INT probable_max_size = -1;
24133 bool misaligned_prologue_used = false;
24135 if (CONST_INT_P (align_exp))
24136 align = INTVAL (align_exp);
24137 /* i386 can do misaligned access on reasonably increased cost. */
24138 if (CONST_INT_P (expected_align_exp)
24139 && INTVAL (expected_align_exp) > align)
24140 align = INTVAL (expected_align_exp);
24141 /* ALIGN is the minimum of destination and source alignment, but we care here
24142 just about destination alignment. */
24143 else if (!issetmem
24144 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24145 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24147 if (CONST_INT_P (count_exp))
24148 min_size = max_size = probable_max_size = count = expected_size
24149 = INTVAL (count_exp);
24150 else
24152 if (min_size_exp)
24153 min_size = INTVAL (min_size_exp);
24154 if (max_size_exp)
24155 max_size = INTVAL (max_size_exp);
24156 if (probable_max_size_exp)
24157 probable_max_size = INTVAL (probable_max_size_exp);
24158 if (CONST_INT_P (expected_size_exp) && count == 0)
24159 expected_size = INTVAL (expected_size_exp);
24162 /* Make sure we don't need to care about overflow later on. */
24163 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24164 return false;
24166 /* Step 0: Decide on preferred algorithm, desired alignment and
24167 size of chunks to be copied by main loop. */
24168 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24169 issetmem,
24170 issetmem && val_exp == const0_rtx,
24171 &dynamic_check, &noalign);
24172 if (alg == libcall)
24173 return false;
24174 gcc_assert (alg != no_stringop);
24176 /* For now vector-version of memset is generated only for memory zeroing, as
24177 creating of promoted vector value is very cheap in this case. */
24178 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24179 alg = unrolled_loop;
24181 if (!count)
24182 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24183 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24184 if (!issetmem)
24185 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24187 unroll_factor = 1;
24188 move_mode = word_mode;
24189 switch (alg)
24191 case libcall:
24192 case no_stringop:
24193 case last_alg:
24194 gcc_unreachable ();
24195 case loop_1_byte:
24196 need_zero_guard = true;
24197 move_mode = QImode;
24198 break;
24199 case loop:
24200 need_zero_guard = true;
24201 break;
24202 case unrolled_loop:
24203 need_zero_guard = true;
24204 unroll_factor = (TARGET_64BIT ? 4 : 2);
24205 break;
24206 case vector_loop:
24207 need_zero_guard = true;
24208 unroll_factor = 4;
24209 /* Find the widest supported mode. */
24210 move_mode = word_mode;
24211 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24212 != CODE_FOR_nothing)
24213 move_mode = GET_MODE_WIDER_MODE (move_mode);
24215 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24216 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24217 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24219 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24220 move_mode = mode_for_vector (word_mode, nunits);
24221 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24222 move_mode = word_mode;
24224 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24225 break;
24226 case rep_prefix_8_byte:
24227 move_mode = DImode;
24228 break;
24229 case rep_prefix_4_byte:
24230 move_mode = SImode;
24231 break;
24232 case rep_prefix_1_byte:
24233 move_mode = QImode;
24234 break;
24236 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24237 epilogue_size_needed = size_needed;
24239 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24240 if (!TARGET_ALIGN_STRINGOPS || noalign)
24241 align = desired_align;
24243 /* Step 1: Prologue guard. */
24245 /* Alignment code needs count to be in register. */
24246 if (CONST_INT_P (count_exp) && desired_align > align)
24248 if (INTVAL (count_exp) > desired_align
24249 && INTVAL (count_exp) > size_needed)
24251 align_bytes
24252 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24253 if (align_bytes <= 0)
24254 align_bytes = 0;
24255 else
24256 align_bytes = desired_align - align_bytes;
24258 if (align_bytes == 0)
24259 count_exp = force_reg (counter_mode (count_exp), count_exp);
24261 gcc_assert (desired_align >= 1 && align >= 1);
24263 /* Misaligned move sequences handle both prologue and epilogue at once.
24264 Default code generation results in a smaller code for large alignments
24265 and also avoids redundant job when sizes are known precisely. */
24266 misaligned_prologue_used
24267 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24268 && MAX (desired_align, epilogue_size_needed) <= 32
24269 && desired_align <= epilogue_size_needed
24270 && ((desired_align > align && !align_bytes)
24271 || (!count && epilogue_size_needed > 1)));
24273 /* Do the cheap promotion to allow better CSE across the
24274 main loop and epilogue (ie one load of the big constant in the
24275 front of all code.
24276 For now the misaligned move sequences do not have fast path
24277 without broadcasting. */
24278 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24280 if (alg == vector_loop)
24282 gcc_assert (val_exp == const0_rtx);
24283 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24284 promoted_val = promote_duplicated_reg_to_size (val_exp,
24285 GET_MODE_SIZE (word_mode),
24286 desired_align, align);
24288 else
24290 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24291 desired_align, align);
24294 /* Misaligned move sequences handles both prologues and epilogues at once.
24295 Default code generation results in smaller code for large alignments and
24296 also avoids redundant job when sizes are known precisely. */
24297 if (misaligned_prologue_used)
24299 /* Misaligned move prologue handled small blocks by itself. */
24300 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24301 (dst, src, &destreg, &srcreg,
24302 move_mode, promoted_val, vec_promoted_val,
24303 &count_exp,
24304 &jump_around_label,
24305 desired_align < align
24306 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24307 desired_align, align, &min_size, dynamic_check, issetmem);
24308 if (!issetmem)
24309 src = change_address (src, BLKmode, srcreg);
24310 dst = change_address (dst, BLKmode, destreg);
24311 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24312 epilogue_size_needed = 0;
24313 if (need_zero_guard && !min_size)
24315 /* It is possible that we copied enough so the main loop will not
24316 execute. */
24317 gcc_assert (size_needed > 1);
24318 if (jump_around_label == NULL_RTX)
24319 jump_around_label = gen_label_rtx ();
24320 emit_cmp_and_jump_insns (count_exp,
24321 GEN_INT (size_needed),
24322 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24323 if (expected_size == -1
24324 || expected_size < (desired_align - align) / 2 + size_needed)
24325 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24326 else
24327 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24330 /* Ensure that alignment prologue won't copy past end of block. */
24331 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24333 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24334 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24335 Make sure it is power of 2. */
24336 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24338 /* To improve performance of small blocks, we jump around the VAL
24339 promoting mode. This mean that if the promoted VAL is not constant,
24340 we might not use it in the epilogue and have to use byte
24341 loop variant. */
24342 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24343 force_loopy_epilogue = true;
24344 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24345 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24347 /* If main algorithm works on QImode, no epilogue is needed.
24348 For small sizes just don't align anything. */
24349 if (size_needed == 1)
24350 desired_align = align;
24351 else
24352 goto epilogue;
24354 else if (!count
24355 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24357 label = gen_label_rtx ();
24358 emit_cmp_and_jump_insns (count_exp,
24359 GEN_INT (epilogue_size_needed),
24360 LTU, 0, counter_mode (count_exp), 1, label);
24361 if (expected_size == -1 || expected_size < epilogue_size_needed)
24362 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24363 else
24364 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24368 /* Emit code to decide on runtime whether library call or inline should be
24369 used. */
24370 if (dynamic_check != -1)
24372 if (!issetmem && CONST_INT_P (count_exp))
24374 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24376 emit_block_move_via_libcall (dst, src, count_exp, false);
24377 count_exp = const0_rtx;
24378 goto epilogue;
24381 else
24383 rtx hot_label = gen_label_rtx ();
24384 if (jump_around_label == NULL_RTX)
24385 jump_around_label = gen_label_rtx ();
24386 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24387 LEU, 0, GET_MODE (count_exp), 1, hot_label);
24388 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24389 if (issetmem)
24390 set_storage_via_libcall (dst, count_exp, val_exp, false);
24391 else
24392 emit_block_move_via_libcall (dst, src, count_exp, false);
24393 emit_jump (jump_around_label);
24394 emit_label (hot_label);
24398 /* Step 2: Alignment prologue. */
24399 /* Do the expensive promotion once we branched off the small blocks. */
24400 if (issetmem && !promoted_val)
24401 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24402 desired_align, align);
24404 if (desired_align > align && !misaligned_prologue_used)
24406 if (align_bytes == 0)
24408 /* Except for the first move in prologue, we no longer know
24409 constant offset in aliasing info. It don't seems to worth
24410 the pain to maintain it for the first move, so throw away
24411 the info early. */
24412 dst = change_address (dst, BLKmode, destreg);
24413 if (!issetmem)
24414 src = change_address (src, BLKmode, srcreg);
24415 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24416 promoted_val, vec_promoted_val,
24417 count_exp, align, desired_align,
24418 issetmem);
24419 /* At most desired_align - align bytes are copied. */
24420 if (min_size < (unsigned)(desired_align - align))
24421 min_size = 0;
24422 else
24423 min_size -= desired_align - align;
24425 else
24427 /* If we know how many bytes need to be stored before dst is
24428 sufficiently aligned, maintain aliasing info accurately. */
24429 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24430 srcreg,
24431 promoted_val,
24432 vec_promoted_val,
24433 desired_align,
24434 align_bytes,
24435 issetmem);
24437 count_exp = plus_constant (counter_mode (count_exp),
24438 count_exp, -align_bytes);
24439 count -= align_bytes;
24440 min_size -= align_bytes;
24441 max_size -= align_bytes;
24443 if (need_zero_guard
24444 && !min_size
24445 && (count < (unsigned HOST_WIDE_INT) size_needed
24446 || (align_bytes == 0
24447 && count < ((unsigned HOST_WIDE_INT) size_needed
24448 + desired_align - align))))
24450 /* It is possible that we copied enough so the main loop will not
24451 execute. */
24452 gcc_assert (size_needed > 1);
24453 if (label == NULL_RTX)
24454 label = gen_label_rtx ();
24455 emit_cmp_and_jump_insns (count_exp,
24456 GEN_INT (size_needed),
24457 LTU, 0, counter_mode (count_exp), 1, label);
24458 if (expected_size == -1
24459 || expected_size < (desired_align - align) / 2 + size_needed)
24460 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24461 else
24462 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24465 if (label && size_needed == 1)
24467 emit_label (label);
24468 LABEL_NUSES (label) = 1;
24469 label = NULL;
24470 epilogue_size_needed = 1;
24471 if (issetmem)
24472 promoted_val = val_exp;
24474 else if (label == NULL_RTX && !misaligned_prologue_used)
24475 epilogue_size_needed = size_needed;
24477 /* Step 3: Main loop. */
24479 switch (alg)
24481 case libcall:
24482 case no_stringop:
24483 case last_alg:
24484 gcc_unreachable ();
24485 case loop_1_byte:
24486 case loop:
24487 case unrolled_loop:
24488 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24489 count_exp, move_mode, unroll_factor,
24490 expected_size, issetmem);
24491 break;
24492 case vector_loop:
24493 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24494 vec_promoted_val, count_exp, move_mode,
24495 unroll_factor, expected_size, issetmem);
24496 break;
24497 case rep_prefix_8_byte:
24498 case rep_prefix_4_byte:
24499 case rep_prefix_1_byte:
24500 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24501 val_exp, count_exp, move_mode, issetmem);
24502 break;
24504 /* Adjust properly the offset of src and dest memory for aliasing. */
24505 if (CONST_INT_P (count_exp))
24507 if (!issetmem)
24508 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24509 (count / size_needed) * size_needed);
24510 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24511 (count / size_needed) * size_needed);
24513 else
24515 if (!issetmem)
24516 src = change_address (src, BLKmode, srcreg);
24517 dst = change_address (dst, BLKmode, destreg);
24520 /* Step 4: Epilogue to copy the remaining bytes. */
24521 epilogue:
24522 if (label)
24524 /* When the main loop is done, COUNT_EXP might hold original count,
24525 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24526 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24527 bytes. Compensate if needed. */
24529 if (size_needed < epilogue_size_needed)
24531 tmp =
24532 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24533 GEN_INT (size_needed - 1), count_exp, 1,
24534 OPTAB_DIRECT);
24535 if (tmp != count_exp)
24536 emit_move_insn (count_exp, tmp);
24538 emit_label (label);
24539 LABEL_NUSES (label) = 1;
24542 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24544 if (force_loopy_epilogue)
24545 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24546 epilogue_size_needed);
24547 else
24549 if (issetmem)
24550 expand_setmem_epilogue (dst, destreg, promoted_val,
24551 vec_promoted_val, count_exp,
24552 epilogue_size_needed);
24553 else
24554 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24555 epilogue_size_needed);
24558 if (jump_around_label)
24559 emit_label (jump_around_label);
24560 return true;
24564 /* Expand the appropriate insns for doing strlen if not just doing
24565 repnz; scasb
24567 out = result, initialized with the start address
24568 align_rtx = alignment of the address.
24569 scratch = scratch register, initialized with the startaddress when
24570 not aligned, otherwise undefined
24572 This is just the body. It needs the initializations mentioned above and
24573 some address computing at the end. These things are done in i386.md. */
24575 static void
24576 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24578 int align;
24579 rtx tmp;
24580 rtx align_2_label = NULL_RTX;
24581 rtx align_3_label = NULL_RTX;
24582 rtx align_4_label = gen_label_rtx ();
24583 rtx end_0_label = gen_label_rtx ();
24584 rtx mem;
24585 rtx tmpreg = gen_reg_rtx (SImode);
24586 rtx scratch = gen_reg_rtx (SImode);
24587 rtx cmp;
24589 align = 0;
24590 if (CONST_INT_P (align_rtx))
24591 align = INTVAL (align_rtx);
24593 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24595 /* Is there a known alignment and is it less than 4? */
24596 if (align < 4)
24598 rtx scratch1 = gen_reg_rtx (Pmode);
24599 emit_move_insn (scratch1, out);
24600 /* Is there a known alignment and is it not 2? */
24601 if (align != 2)
24603 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24604 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24606 /* Leave just the 3 lower bits. */
24607 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24608 NULL_RTX, 0, OPTAB_WIDEN);
24610 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24611 Pmode, 1, align_4_label);
24612 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24613 Pmode, 1, align_2_label);
24614 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24615 Pmode, 1, align_3_label);
24617 else
24619 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24620 check if is aligned to 4 - byte. */
24622 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24623 NULL_RTX, 0, OPTAB_WIDEN);
24625 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24626 Pmode, 1, align_4_label);
24629 mem = change_address (src, QImode, out);
24631 /* Now compare the bytes. */
24633 /* Compare the first n unaligned byte on a byte per byte basis. */
24634 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24635 QImode, 1, end_0_label);
24637 /* Increment the address. */
24638 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24640 /* Not needed with an alignment of 2 */
24641 if (align != 2)
24643 emit_label (align_2_label);
24645 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24646 end_0_label);
24648 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24650 emit_label (align_3_label);
24653 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24654 end_0_label);
24656 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24659 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24660 align this loop. It gives only huge programs, but does not help to
24661 speed up. */
24662 emit_label (align_4_label);
24664 mem = change_address (src, SImode, out);
24665 emit_move_insn (scratch, mem);
24666 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24668 /* This formula yields a nonzero result iff one of the bytes is zero.
24669 This saves three branches inside loop and many cycles. */
24671 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24672 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24673 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24674 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24675 gen_int_mode (0x80808080, SImode)));
24676 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24677 align_4_label);
24679 if (TARGET_CMOVE)
24681 rtx reg = gen_reg_rtx (SImode);
24682 rtx reg2 = gen_reg_rtx (Pmode);
24683 emit_move_insn (reg, tmpreg);
24684 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24686 /* If zero is not in the first two bytes, move two bytes forward. */
24687 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24688 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24689 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24690 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24691 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24692 reg,
24693 tmpreg)));
24694 /* Emit lea manually to avoid clobbering of flags. */
24695 emit_insn (gen_rtx_SET (SImode, reg2,
24696 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24698 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24699 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24700 emit_insn (gen_rtx_SET (VOIDmode, out,
24701 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24702 reg2,
24703 out)));
24705 else
24707 rtx end_2_label = gen_label_rtx ();
24708 /* Is zero in the first two bytes? */
24710 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24711 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24712 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24713 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24714 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24715 pc_rtx);
24716 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24717 JUMP_LABEL (tmp) = end_2_label;
24719 /* Not in the first two. Move two bytes forward. */
24720 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24721 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24723 emit_label (end_2_label);
24727 /* Avoid branch in fixing the byte. */
24728 tmpreg = gen_lowpart (QImode, tmpreg);
24729 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24730 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24731 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24732 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24734 emit_label (end_0_label);
24737 /* Expand strlen. */
24739 bool
24740 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24742 rtx addr, scratch1, scratch2, scratch3, scratch4;
24744 /* The generic case of strlen expander is long. Avoid it's
24745 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24747 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24748 && !TARGET_INLINE_ALL_STRINGOPS
24749 && !optimize_insn_for_size_p ()
24750 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24751 return false;
24753 addr = force_reg (Pmode, XEXP (src, 0));
24754 scratch1 = gen_reg_rtx (Pmode);
24756 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24757 && !optimize_insn_for_size_p ())
24759 /* Well it seems that some optimizer does not combine a call like
24760 foo(strlen(bar), strlen(bar));
24761 when the move and the subtraction is done here. It does calculate
24762 the length just once when these instructions are done inside of
24763 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24764 often used and I use one fewer register for the lifetime of
24765 output_strlen_unroll() this is better. */
24767 emit_move_insn (out, addr);
24769 ix86_expand_strlensi_unroll_1 (out, src, align);
24771 /* strlensi_unroll_1 returns the address of the zero at the end of
24772 the string, like memchr(), so compute the length by subtracting
24773 the start address. */
24774 emit_insn (ix86_gen_sub3 (out, out, addr));
24776 else
24778 rtx unspec;
24780 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24781 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24782 return false;
24784 scratch2 = gen_reg_rtx (Pmode);
24785 scratch3 = gen_reg_rtx (Pmode);
24786 scratch4 = force_reg (Pmode, constm1_rtx);
24788 emit_move_insn (scratch3, addr);
24789 eoschar = force_reg (QImode, eoschar);
24791 src = replace_equiv_address_nv (src, scratch3);
24793 /* If .md starts supporting :P, this can be done in .md. */
24794 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24795 scratch4), UNSPEC_SCAS);
24796 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24797 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24798 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24800 return true;
24803 /* For given symbol (function) construct code to compute address of it's PLT
24804 entry in large x86-64 PIC model. */
24805 static rtx
24806 construct_plt_address (rtx symbol)
24808 rtx tmp, unspec;
24810 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24811 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24812 gcc_assert (Pmode == DImode);
24814 tmp = gen_reg_rtx (Pmode);
24815 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24817 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24818 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24819 return tmp;
24823 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24824 rtx callarg2,
24825 rtx pop, bool sibcall)
24827 unsigned int const cregs_size
24828 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24829 rtx vec[3 + cregs_size];
24830 rtx use = NULL, call;
24831 unsigned int vec_len = 0;
24833 if (pop == const0_rtx)
24834 pop = NULL;
24835 gcc_assert (!TARGET_64BIT || !pop);
24837 if (TARGET_MACHO && !TARGET_64BIT)
24839 #if TARGET_MACHO
24840 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24841 fnaddr = machopic_indirect_call_target (fnaddr);
24842 #endif
24844 else
24846 /* Static functions and indirect calls don't need the pic register. */
24847 if (flag_pic
24848 && (!TARGET_64BIT
24849 || (ix86_cmodel == CM_LARGE_PIC
24850 && DEFAULT_ABI != MS_ABI))
24851 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24852 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24853 use_reg (&use, pic_offset_table_rtx);
24856 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24858 rtx al = gen_rtx_REG (QImode, AX_REG);
24859 emit_move_insn (al, callarg2);
24860 use_reg (&use, al);
24863 if (ix86_cmodel == CM_LARGE_PIC
24864 && !TARGET_PECOFF
24865 && MEM_P (fnaddr)
24866 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24867 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24868 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24869 else if (sibcall
24870 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24871 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24873 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24874 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24877 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24878 if (retval)
24879 call = gen_rtx_SET (VOIDmode, retval, call);
24880 vec[vec_len++] = call;
24882 if (pop)
24884 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24885 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24886 vec[vec_len++] = pop;
24889 if (TARGET_64BIT_MS_ABI
24890 && (!callarg2 || INTVAL (callarg2) != -2))
24892 unsigned i;
24894 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24895 UNSPEC_MS_TO_SYSV_CALL);
24897 for (i = 0; i < cregs_size; i++)
24899 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24900 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24902 vec[vec_len++]
24903 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24907 if (vec_len > 1)
24908 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24909 call = emit_call_insn (call);
24910 if (use)
24911 CALL_INSN_FUNCTION_USAGE (call) = use;
24913 return call;
24916 /* Output the assembly for a call instruction. */
24918 const char *
24919 ix86_output_call_insn (rtx insn, rtx call_op)
24921 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24922 bool seh_nop_p = false;
24923 const char *xasm;
24925 if (SIBLING_CALL_P (insn))
24927 if (direct_p)
24928 xasm = "jmp\t%P0";
24929 /* SEH epilogue detection requires the indirect branch case
24930 to include REX.W. */
24931 else if (TARGET_SEH)
24932 xasm = "rex.W jmp %A0";
24933 else
24934 xasm = "jmp\t%A0";
24936 output_asm_insn (xasm, &call_op);
24937 return "";
24940 /* SEH unwinding can require an extra nop to be emitted in several
24941 circumstances. Determine if we have one of those. */
24942 if (TARGET_SEH)
24944 rtx i;
24946 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24948 /* If we get to another real insn, we don't need the nop. */
24949 if (INSN_P (i))
24950 break;
24952 /* If we get to the epilogue note, prevent a catch region from
24953 being adjacent to the standard epilogue sequence. If non-
24954 call-exceptions, we'll have done this during epilogue emission. */
24955 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24956 && !flag_non_call_exceptions
24957 && !can_throw_internal (insn))
24959 seh_nop_p = true;
24960 break;
24964 /* If we didn't find a real insn following the call, prevent the
24965 unwinder from looking into the next function. */
24966 if (i == NULL)
24967 seh_nop_p = true;
24970 if (direct_p)
24971 xasm = "call\t%P0";
24972 else
24973 xasm = "call\t%A0";
24975 output_asm_insn (xasm, &call_op);
24977 if (seh_nop_p)
24978 return "nop";
24980 return "";
24983 /* Clear stack slot assignments remembered from previous functions.
24984 This is called from INIT_EXPANDERS once before RTL is emitted for each
24985 function. */
24987 static struct machine_function *
24988 ix86_init_machine_status (void)
24990 struct machine_function *f;
24992 f = ggc_alloc_cleared_machine_function ();
24993 f->use_fast_prologue_epilogue_nregs = -1;
24994 f->call_abi = ix86_abi;
24996 return f;
24999 /* Return a MEM corresponding to a stack slot with mode MODE.
25000 Allocate a new slot if necessary.
25002 The RTL for a function can have several slots available: N is
25003 which slot to use. */
25006 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25008 struct stack_local_entry *s;
25010 gcc_assert (n < MAX_386_STACK_LOCALS);
25012 for (s = ix86_stack_locals; s; s = s->next)
25013 if (s->mode == mode && s->n == n)
25014 return validize_mem (copy_rtx (s->rtl));
25016 s = ggc_alloc_stack_local_entry ();
25017 s->n = n;
25018 s->mode = mode;
25019 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25021 s->next = ix86_stack_locals;
25022 ix86_stack_locals = s;
25023 return validize_mem (s->rtl);
25026 static void
25027 ix86_instantiate_decls (void)
25029 struct stack_local_entry *s;
25031 for (s = ix86_stack_locals; s; s = s->next)
25032 if (s->rtl != NULL_RTX)
25033 instantiate_decl_rtl (s->rtl);
25036 /* Check whether x86 address PARTS is a pc-relative address. */
25038 static bool
25039 rip_relative_addr_p (struct ix86_address *parts)
25041 rtx base, index, disp;
25043 base = parts->base;
25044 index = parts->index;
25045 disp = parts->disp;
25047 if (disp && !base && !index)
25049 if (TARGET_64BIT)
25051 rtx symbol = disp;
25053 if (GET_CODE (disp) == CONST)
25054 symbol = XEXP (disp, 0);
25055 if (GET_CODE (symbol) == PLUS
25056 && CONST_INT_P (XEXP (symbol, 1)))
25057 symbol = XEXP (symbol, 0);
25059 if (GET_CODE (symbol) == LABEL_REF
25060 || (GET_CODE (symbol) == SYMBOL_REF
25061 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25062 || (GET_CODE (symbol) == UNSPEC
25063 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25064 || XINT (symbol, 1) == UNSPEC_PCREL
25065 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25066 return true;
25069 return false;
25072 /* Calculate the length of the memory address in the instruction encoding.
25073 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25074 or other prefixes. We never generate addr32 prefix for LEA insn. */
25077 memory_address_length (rtx addr, bool lea)
25079 struct ix86_address parts;
25080 rtx base, index, disp;
25081 int len;
25082 int ok;
25084 if (GET_CODE (addr) == PRE_DEC
25085 || GET_CODE (addr) == POST_INC
25086 || GET_CODE (addr) == PRE_MODIFY
25087 || GET_CODE (addr) == POST_MODIFY)
25088 return 0;
25090 ok = ix86_decompose_address (addr, &parts);
25091 gcc_assert (ok);
25093 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25095 /* If this is not LEA instruction, add the length of addr32 prefix. */
25096 if (TARGET_64BIT && !lea
25097 && (SImode_address_operand (addr, VOIDmode)
25098 || (parts.base && GET_MODE (parts.base) == SImode)
25099 || (parts.index && GET_MODE (parts.index) == SImode)))
25100 len++;
25102 base = parts.base;
25103 index = parts.index;
25104 disp = parts.disp;
25106 if (base && GET_CODE (base) == SUBREG)
25107 base = SUBREG_REG (base);
25108 if (index && GET_CODE (index) == SUBREG)
25109 index = SUBREG_REG (index);
25111 gcc_assert (base == NULL_RTX || REG_P (base));
25112 gcc_assert (index == NULL_RTX || REG_P (index));
25114 /* Rule of thumb:
25115 - esp as the base always wants an index,
25116 - ebp as the base always wants a displacement,
25117 - r12 as the base always wants an index,
25118 - r13 as the base always wants a displacement. */
25120 /* Register Indirect. */
25121 if (base && !index && !disp)
25123 /* esp (for its index) and ebp (for its displacement) need
25124 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25125 code. */
25126 if (base == arg_pointer_rtx
25127 || base == frame_pointer_rtx
25128 || REGNO (base) == SP_REG
25129 || REGNO (base) == BP_REG
25130 || REGNO (base) == R12_REG
25131 || REGNO (base) == R13_REG)
25132 len++;
25135 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25136 is not disp32, but disp32(%rip), so for disp32
25137 SIB byte is needed, unless print_operand_address
25138 optimizes it into disp32(%rip) or (%rip) is implied
25139 by UNSPEC. */
25140 else if (disp && !base && !index)
25142 len += 4;
25143 if (rip_relative_addr_p (&parts))
25144 len++;
25146 else
25148 /* Find the length of the displacement constant. */
25149 if (disp)
25151 if (base && satisfies_constraint_K (disp))
25152 len += 1;
25153 else
25154 len += 4;
25156 /* ebp always wants a displacement. Similarly r13. */
25157 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25158 len++;
25160 /* An index requires the two-byte modrm form.... */
25161 if (index
25162 /* ...like esp (or r12), which always wants an index. */
25163 || base == arg_pointer_rtx
25164 || base == frame_pointer_rtx
25165 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25166 len++;
25169 return len;
25172 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25173 is set, expect that insn have 8bit immediate alternative. */
25175 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25177 int len = 0;
25178 int i;
25179 extract_insn_cached (insn);
25180 for (i = recog_data.n_operands - 1; i >= 0; --i)
25181 if (CONSTANT_P (recog_data.operand[i]))
25183 enum attr_mode mode = get_attr_mode (insn);
25185 gcc_assert (!len);
25186 if (shortform && CONST_INT_P (recog_data.operand[i]))
25188 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25189 switch (mode)
25191 case MODE_QI:
25192 len = 1;
25193 continue;
25194 case MODE_HI:
25195 ival = trunc_int_for_mode (ival, HImode);
25196 break;
25197 case MODE_SI:
25198 ival = trunc_int_for_mode (ival, SImode);
25199 break;
25200 default:
25201 break;
25203 if (IN_RANGE (ival, -128, 127))
25205 len = 1;
25206 continue;
25209 switch (mode)
25211 case MODE_QI:
25212 len = 1;
25213 break;
25214 case MODE_HI:
25215 len = 2;
25216 break;
25217 case MODE_SI:
25218 len = 4;
25219 break;
25220 /* Immediates for DImode instructions are encoded
25221 as 32bit sign extended values. */
25222 case MODE_DI:
25223 len = 4;
25224 break;
25225 default:
25226 fatal_insn ("unknown insn mode", insn);
25229 return len;
25232 /* Compute default value for "length_address" attribute. */
25234 ix86_attr_length_address_default (rtx insn)
25236 int i;
25238 if (get_attr_type (insn) == TYPE_LEA)
25240 rtx set = PATTERN (insn), addr;
25242 if (GET_CODE (set) == PARALLEL)
25243 set = XVECEXP (set, 0, 0);
25245 gcc_assert (GET_CODE (set) == SET);
25247 addr = SET_SRC (set);
25249 return memory_address_length (addr, true);
25252 extract_insn_cached (insn);
25253 for (i = recog_data.n_operands - 1; i >= 0; --i)
25254 if (MEM_P (recog_data.operand[i]))
25256 constrain_operands_cached (reload_completed);
25257 if (which_alternative != -1)
25259 const char *constraints = recog_data.constraints[i];
25260 int alt = which_alternative;
25262 while (*constraints == '=' || *constraints == '+')
25263 constraints++;
25264 while (alt-- > 0)
25265 while (*constraints++ != ',')
25267 /* Skip ignored operands. */
25268 if (*constraints == 'X')
25269 continue;
25271 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25273 return 0;
25276 /* Compute default value for "length_vex" attribute. It includes
25277 2 or 3 byte VEX prefix and 1 opcode byte. */
25280 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25282 int i;
25284 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25285 byte VEX prefix. */
25286 if (!has_0f_opcode || has_vex_w)
25287 return 3 + 1;
25289 /* We can always use 2 byte VEX prefix in 32bit. */
25290 if (!TARGET_64BIT)
25291 return 2 + 1;
25293 extract_insn_cached (insn);
25295 for (i = recog_data.n_operands - 1; i >= 0; --i)
25296 if (REG_P (recog_data.operand[i]))
25298 /* REX.W bit uses 3 byte VEX prefix. */
25299 if (GET_MODE (recog_data.operand[i]) == DImode
25300 && GENERAL_REG_P (recog_data.operand[i]))
25301 return 3 + 1;
25303 else
25305 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25306 if (MEM_P (recog_data.operand[i])
25307 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25308 return 3 + 1;
25311 return 2 + 1;
25314 /* Return the maximum number of instructions a cpu can issue. */
25316 static int
25317 ix86_issue_rate (void)
25319 switch (ix86_tune)
25321 case PROCESSOR_PENTIUM:
25322 case PROCESSOR_BONNELL:
25323 case PROCESSOR_SILVERMONT:
25324 case PROCESSOR_INTEL:
25325 case PROCESSOR_K6:
25326 case PROCESSOR_BTVER2:
25327 case PROCESSOR_PENTIUM4:
25328 case PROCESSOR_NOCONA:
25329 return 2;
25331 case PROCESSOR_PENTIUMPRO:
25332 case PROCESSOR_ATHLON:
25333 case PROCESSOR_K8:
25334 case PROCESSOR_AMDFAM10:
25335 case PROCESSOR_GENERIC:
25336 case PROCESSOR_BTVER1:
25337 return 3;
25339 case PROCESSOR_BDVER1:
25340 case PROCESSOR_BDVER2:
25341 case PROCESSOR_BDVER3:
25342 case PROCESSOR_BDVER4:
25343 case PROCESSOR_CORE2:
25344 case PROCESSOR_NEHALEM:
25345 case PROCESSOR_SANDYBRIDGE:
25346 case PROCESSOR_HASWELL:
25347 return 4;
25349 default:
25350 return 1;
25354 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25355 by DEP_INSN and nothing set by DEP_INSN. */
25357 static bool
25358 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25360 rtx set, set2;
25362 /* Simplify the test for uninteresting insns. */
25363 if (insn_type != TYPE_SETCC
25364 && insn_type != TYPE_ICMOV
25365 && insn_type != TYPE_FCMOV
25366 && insn_type != TYPE_IBR)
25367 return false;
25369 if ((set = single_set (dep_insn)) != 0)
25371 set = SET_DEST (set);
25372 set2 = NULL_RTX;
25374 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25375 && XVECLEN (PATTERN (dep_insn), 0) == 2
25376 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25377 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25379 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25380 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25382 else
25383 return false;
25385 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25386 return false;
25388 /* This test is true if the dependent insn reads the flags but
25389 not any other potentially set register. */
25390 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25391 return false;
25393 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25394 return false;
25396 return true;
25399 /* Return true iff USE_INSN has a memory address with operands set by
25400 SET_INSN. */
25402 bool
25403 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25405 int i;
25406 extract_insn_cached (use_insn);
25407 for (i = recog_data.n_operands - 1; i >= 0; --i)
25408 if (MEM_P (recog_data.operand[i]))
25410 rtx addr = XEXP (recog_data.operand[i], 0);
25411 return modified_in_p (addr, set_insn) != 0;
25413 return false;
25416 /* Helper function for exact_store_load_dependency.
25417 Return true if addr is found in insn. */
25418 static bool
25419 exact_dependency_1 (rtx addr, rtx insn)
25421 enum rtx_code code;
25422 const char *format_ptr;
25423 int i, j;
25425 code = GET_CODE (insn);
25426 switch (code)
25428 case MEM:
25429 if (rtx_equal_p (addr, insn))
25430 return true;
25431 break;
25432 case REG:
25433 CASE_CONST_ANY:
25434 case SYMBOL_REF:
25435 case CODE_LABEL:
25436 case PC:
25437 case CC0:
25438 case EXPR_LIST:
25439 return false;
25440 default:
25441 break;
25444 format_ptr = GET_RTX_FORMAT (code);
25445 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25447 switch (*format_ptr++)
25449 case 'e':
25450 if (exact_dependency_1 (addr, XEXP (insn, i)))
25451 return true;
25452 break;
25453 case 'E':
25454 for (j = 0; j < XVECLEN (insn, i); j++)
25455 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25456 return true;
25457 break;
25460 return false;
25463 /* Return true if there exists exact dependency for store & load, i.e.
25464 the same memory address is used in them. */
25465 static bool
25466 exact_store_load_dependency (rtx store, rtx load)
25468 rtx set1, set2;
25470 set1 = single_set (store);
25471 if (!set1)
25472 return false;
25473 if (!MEM_P (SET_DEST (set1)))
25474 return false;
25475 set2 = single_set (load);
25476 if (!set2)
25477 return false;
25478 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25479 return true;
25480 return false;
25483 static int
25484 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25486 enum attr_type insn_type, dep_insn_type;
25487 enum attr_memory memory;
25488 rtx set, set2;
25489 int dep_insn_code_number;
25491 /* Anti and output dependencies have zero cost on all CPUs. */
25492 if (REG_NOTE_KIND (link) != 0)
25493 return 0;
25495 dep_insn_code_number = recog_memoized (dep_insn);
25497 /* If we can't recognize the insns, we can't really do anything. */
25498 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25499 return cost;
25501 insn_type = get_attr_type (insn);
25502 dep_insn_type = get_attr_type (dep_insn);
25504 switch (ix86_tune)
25506 case PROCESSOR_PENTIUM:
25507 /* Address Generation Interlock adds a cycle of latency. */
25508 if (insn_type == TYPE_LEA)
25510 rtx addr = PATTERN (insn);
25512 if (GET_CODE (addr) == PARALLEL)
25513 addr = XVECEXP (addr, 0, 0);
25515 gcc_assert (GET_CODE (addr) == SET);
25517 addr = SET_SRC (addr);
25518 if (modified_in_p (addr, dep_insn))
25519 cost += 1;
25521 else if (ix86_agi_dependent (dep_insn, insn))
25522 cost += 1;
25524 /* ??? Compares pair with jump/setcc. */
25525 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25526 cost = 0;
25528 /* Floating point stores require value to be ready one cycle earlier. */
25529 if (insn_type == TYPE_FMOV
25530 && get_attr_memory (insn) == MEMORY_STORE
25531 && !ix86_agi_dependent (dep_insn, insn))
25532 cost += 1;
25533 break;
25535 case PROCESSOR_PENTIUMPRO:
25536 /* INT->FP conversion is expensive. */
25537 if (get_attr_fp_int_src (dep_insn))
25538 cost += 5;
25540 /* There is one cycle extra latency between an FP op and a store. */
25541 if (insn_type == TYPE_FMOV
25542 && (set = single_set (dep_insn)) != NULL_RTX
25543 && (set2 = single_set (insn)) != NULL_RTX
25544 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25545 && MEM_P (SET_DEST (set2)))
25546 cost += 1;
25548 memory = get_attr_memory (insn);
25550 /* Show ability of reorder buffer to hide latency of load by executing
25551 in parallel with previous instruction in case
25552 previous instruction is not needed to compute the address. */
25553 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25554 && !ix86_agi_dependent (dep_insn, insn))
25556 /* Claim moves to take one cycle, as core can issue one load
25557 at time and the next load can start cycle later. */
25558 if (dep_insn_type == TYPE_IMOV
25559 || dep_insn_type == TYPE_FMOV)
25560 cost = 1;
25561 else if (cost > 1)
25562 cost--;
25564 break;
25566 case PROCESSOR_K6:
25567 /* The esp dependency is resolved before
25568 the instruction is really finished. */
25569 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25570 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25571 return 1;
25573 /* INT->FP conversion is expensive. */
25574 if (get_attr_fp_int_src (dep_insn))
25575 cost += 5;
25577 memory = get_attr_memory (insn);
25579 /* Show ability of reorder buffer to hide latency of load by executing
25580 in parallel with previous instruction in case
25581 previous instruction is not needed to compute the address. */
25582 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25583 && !ix86_agi_dependent (dep_insn, insn))
25585 /* Claim moves to take one cycle, as core can issue one load
25586 at time and the next load can start cycle later. */
25587 if (dep_insn_type == TYPE_IMOV
25588 || dep_insn_type == TYPE_FMOV)
25589 cost = 1;
25590 else if (cost > 2)
25591 cost -= 2;
25592 else
25593 cost = 1;
25595 break;
25597 case PROCESSOR_AMDFAM10:
25598 case PROCESSOR_BDVER1:
25599 case PROCESSOR_BDVER2:
25600 case PROCESSOR_BDVER3:
25601 case PROCESSOR_BDVER4:
25602 case PROCESSOR_BTVER1:
25603 case PROCESSOR_BTVER2:
25604 case PROCESSOR_GENERIC:
25605 /* Stack engine allows to execute push&pop instructions in parall. */
25606 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25607 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25608 return 0;
25609 /* FALLTHRU */
25611 case PROCESSOR_ATHLON:
25612 case PROCESSOR_K8:
25613 memory = get_attr_memory (insn);
25615 /* Show ability of reorder buffer to hide latency of load by executing
25616 in parallel with previous instruction in case
25617 previous instruction is not needed to compute the address. */
25618 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25619 && !ix86_agi_dependent (dep_insn, insn))
25621 enum attr_unit unit = get_attr_unit (insn);
25622 int loadcost = 3;
25624 /* Because of the difference between the length of integer and
25625 floating unit pipeline preparation stages, the memory operands
25626 for floating point are cheaper.
25628 ??? For Athlon it the difference is most probably 2. */
25629 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25630 loadcost = 3;
25631 else
25632 loadcost = TARGET_ATHLON ? 2 : 0;
25634 if (cost >= loadcost)
25635 cost -= loadcost;
25636 else
25637 cost = 0;
25639 break;
25641 case PROCESSOR_CORE2:
25642 case PROCESSOR_NEHALEM:
25643 case PROCESSOR_SANDYBRIDGE:
25644 case PROCESSOR_HASWELL:
25645 /* Stack engine allows to execute push&pop instructions in parall. */
25646 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25647 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25648 return 0;
25650 memory = get_attr_memory (insn);
25652 /* Show ability of reorder buffer to hide latency of load by executing
25653 in parallel with previous instruction in case
25654 previous instruction is not needed to compute the address. */
25655 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25656 && !ix86_agi_dependent (dep_insn, insn))
25658 if (cost >= 4)
25659 cost -= 4;
25660 else
25661 cost = 0;
25663 break;
25665 case PROCESSOR_SILVERMONT:
25666 case PROCESSOR_INTEL:
25667 if (!reload_completed)
25668 return cost;
25670 /* Increase cost of integer loads. */
25671 memory = get_attr_memory (dep_insn);
25672 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25674 enum attr_unit unit = get_attr_unit (dep_insn);
25675 if (unit == UNIT_INTEGER && cost == 1)
25677 if (memory == MEMORY_LOAD)
25678 cost = 3;
25679 else
25681 /* Increase cost of ld/st for short int types only
25682 because of store forwarding issue. */
25683 rtx set = single_set (dep_insn);
25684 if (set && (GET_MODE (SET_DEST (set)) == QImode
25685 || GET_MODE (SET_DEST (set)) == HImode))
25687 /* Increase cost of store/load insn if exact
25688 dependence exists and it is load insn. */
25689 enum attr_memory insn_memory = get_attr_memory (insn);
25690 if (insn_memory == MEMORY_LOAD
25691 && exact_store_load_dependency (dep_insn, insn))
25692 cost = 3;
25698 default:
25699 break;
25702 return cost;
25705 /* How many alternative schedules to try. This should be as wide as the
25706 scheduling freedom in the DFA, but no wider. Making this value too
25707 large results extra work for the scheduler. */
25709 static int
25710 ia32_multipass_dfa_lookahead (void)
25712 switch (ix86_tune)
25714 case PROCESSOR_PENTIUM:
25715 return 2;
25717 case PROCESSOR_PENTIUMPRO:
25718 case PROCESSOR_K6:
25719 return 1;
25721 case PROCESSOR_BDVER1:
25722 case PROCESSOR_BDVER2:
25723 case PROCESSOR_BDVER3:
25724 case PROCESSOR_BDVER4:
25725 /* We use lookahead value 4 for BD both before and after reload
25726 schedules. Plan is to have value 8 included for O3. */
25727 return 4;
25729 case PROCESSOR_CORE2:
25730 case PROCESSOR_NEHALEM:
25731 case PROCESSOR_SANDYBRIDGE:
25732 case PROCESSOR_HASWELL:
25733 case PROCESSOR_BONNELL:
25734 case PROCESSOR_SILVERMONT:
25735 case PROCESSOR_INTEL:
25736 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25737 as many instructions can be executed on a cycle, i.e.,
25738 issue_rate. I wonder why tuning for many CPUs does not do this. */
25739 if (reload_completed)
25740 return ix86_issue_rate ();
25741 /* Don't use lookahead for pre-reload schedule to save compile time. */
25742 return 0;
25744 default:
25745 return 0;
25749 /* Return true if target platform supports macro-fusion. */
25751 static bool
25752 ix86_macro_fusion_p ()
25754 return TARGET_FUSE_CMP_AND_BRANCH;
25757 /* Check whether current microarchitecture support macro fusion
25758 for insn pair "CONDGEN + CONDJMP". Refer to
25759 "Intel Architectures Optimization Reference Manual". */
25761 static bool
25762 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25764 rtx src, dest;
25765 rtx single_set = single_set (condgen);
25766 enum rtx_code ccode;
25767 rtx compare_set = NULL_RTX, test_if, cond;
25768 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25770 if (get_attr_type (condgen) != TYPE_TEST
25771 && get_attr_type (condgen) != TYPE_ICMP
25772 && get_attr_type (condgen) != TYPE_INCDEC
25773 && get_attr_type (condgen) != TYPE_ALU)
25774 return false;
25776 if (single_set == NULL_RTX
25777 && !TARGET_FUSE_ALU_AND_BRANCH)
25778 return false;
25780 if (single_set != NULL_RTX)
25781 compare_set = single_set;
25782 else
25784 int i;
25785 rtx pat = PATTERN (condgen);
25786 for (i = 0; i < XVECLEN (pat, 0); i++)
25787 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25789 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25790 if (GET_CODE (set_src) == COMPARE)
25791 compare_set = XVECEXP (pat, 0, i);
25792 else
25793 alu_set = XVECEXP (pat, 0, i);
25796 if (compare_set == NULL_RTX)
25797 return false;
25798 src = SET_SRC (compare_set);
25799 if (GET_CODE (src) != COMPARE)
25800 return false;
25802 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25803 supported. */
25804 if ((MEM_P (XEXP (src, 0))
25805 && CONST_INT_P (XEXP (src, 1)))
25806 || (MEM_P (XEXP (src, 1))
25807 && CONST_INT_P (XEXP (src, 0))))
25808 return false;
25810 /* No fusion for RIP-relative address. */
25811 if (MEM_P (XEXP (src, 0)))
25812 addr = XEXP (XEXP (src, 0), 0);
25813 else if (MEM_P (XEXP (src, 1)))
25814 addr = XEXP (XEXP (src, 1), 0);
25816 if (addr) {
25817 ix86_address parts;
25818 int ok = ix86_decompose_address (addr, &parts);
25819 gcc_assert (ok);
25821 if (rip_relative_addr_p (&parts))
25822 return false;
25825 test_if = SET_SRC (pc_set (condjmp));
25826 cond = XEXP (test_if, 0);
25827 ccode = GET_CODE (cond);
25828 /* Check whether conditional jump use Sign or Overflow Flags. */
25829 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25830 && (ccode == GE
25831 || ccode == GT
25832 || ccode == LE
25833 || ccode == LT))
25834 return false;
25836 /* Return true for TYPE_TEST and TYPE_ICMP. */
25837 if (get_attr_type (condgen) == TYPE_TEST
25838 || get_attr_type (condgen) == TYPE_ICMP)
25839 return true;
25841 /* The following is the case that macro-fusion for alu + jmp. */
25842 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25843 return false;
25845 /* No fusion for alu op with memory destination operand. */
25846 dest = SET_DEST (alu_set);
25847 if (MEM_P (dest))
25848 return false;
25850 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25851 supported. */
25852 if (get_attr_type (condgen) == TYPE_INCDEC
25853 && (ccode == GEU
25854 || ccode == GTU
25855 || ccode == LEU
25856 || ccode == LTU))
25857 return false;
25859 return true;
25862 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25863 execution. It is applied if
25864 (1) IMUL instruction is on the top of list;
25865 (2) There exists the only producer of independent IMUL instruction in
25866 ready list.
25867 Return index of IMUL producer if it was found and -1 otherwise. */
25868 static int
25869 do_reorder_for_imul (rtx *ready, int n_ready)
25871 rtx insn, set, insn1, insn2;
25872 sd_iterator_def sd_it;
25873 dep_t dep;
25874 int index = -1;
25875 int i;
25877 if (!TARGET_BONNELL)
25878 return index;
25880 /* Check that IMUL instruction is on the top of ready list. */
25881 insn = ready[n_ready - 1];
25882 set = single_set (insn);
25883 if (!set)
25884 return index;
25885 if (!(GET_CODE (SET_SRC (set)) == MULT
25886 && GET_MODE (SET_SRC (set)) == SImode))
25887 return index;
25889 /* Search for producer of independent IMUL instruction. */
25890 for (i = n_ready - 2; i >= 0; i--)
25892 insn = ready[i];
25893 if (!NONDEBUG_INSN_P (insn))
25894 continue;
25895 /* Skip IMUL instruction. */
25896 insn2 = PATTERN (insn);
25897 if (GET_CODE (insn2) == PARALLEL)
25898 insn2 = XVECEXP (insn2, 0, 0);
25899 if (GET_CODE (insn2) == SET
25900 && GET_CODE (SET_SRC (insn2)) == MULT
25901 && GET_MODE (SET_SRC (insn2)) == SImode)
25902 continue;
25904 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25906 rtx con;
25907 con = DEP_CON (dep);
25908 if (!NONDEBUG_INSN_P (con))
25909 continue;
25910 insn1 = PATTERN (con);
25911 if (GET_CODE (insn1) == PARALLEL)
25912 insn1 = XVECEXP (insn1, 0, 0);
25914 if (GET_CODE (insn1) == SET
25915 && GET_CODE (SET_SRC (insn1)) == MULT
25916 && GET_MODE (SET_SRC (insn1)) == SImode)
25918 sd_iterator_def sd_it1;
25919 dep_t dep1;
25920 /* Check if there is no other dependee for IMUL. */
25921 index = i;
25922 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25924 rtx pro;
25925 pro = DEP_PRO (dep1);
25926 if (!NONDEBUG_INSN_P (pro))
25927 continue;
25928 if (pro != insn)
25929 index = -1;
25931 if (index >= 0)
25932 break;
25935 if (index >= 0)
25936 break;
25938 return index;
25941 /* Try to find the best candidate on the top of ready list if two insns
25942 have the same priority - candidate is best if its dependees were
25943 scheduled earlier. Applied for Silvermont only.
25944 Return true if top 2 insns must be interchanged. */
25945 static bool
25946 swap_top_of_ready_list (rtx *ready, int n_ready)
25948 rtx top = ready[n_ready - 1];
25949 rtx next = ready[n_ready - 2];
25950 rtx set;
25951 sd_iterator_def sd_it;
25952 dep_t dep;
25953 int clock1 = -1;
25954 int clock2 = -1;
25955 #define INSN_TICK(INSN) (HID (INSN)->tick)
25957 if (!TARGET_SILVERMONT && !TARGET_INTEL)
25958 return false;
25960 if (!NONDEBUG_INSN_P (top))
25961 return false;
25962 if (!NONJUMP_INSN_P (top))
25963 return false;
25964 if (!NONDEBUG_INSN_P (next))
25965 return false;
25966 if (!NONJUMP_INSN_P (next))
25967 return false;
25968 set = single_set (top);
25969 if (!set)
25970 return false;
25971 set = single_set (next);
25972 if (!set)
25973 return false;
25975 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
25977 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
25978 return false;
25979 /* Determine winner more precise. */
25980 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
25982 rtx pro;
25983 pro = DEP_PRO (dep);
25984 if (!NONDEBUG_INSN_P (pro))
25985 continue;
25986 if (INSN_TICK (pro) > clock1)
25987 clock1 = INSN_TICK (pro);
25989 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
25991 rtx pro;
25992 pro = DEP_PRO (dep);
25993 if (!NONDEBUG_INSN_P (pro))
25994 continue;
25995 if (INSN_TICK (pro) > clock2)
25996 clock2 = INSN_TICK (pro);
25999 if (clock1 == clock2)
26001 /* Determine winner - load must win. */
26002 enum attr_memory memory1, memory2;
26003 memory1 = get_attr_memory (top);
26004 memory2 = get_attr_memory (next);
26005 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26006 return true;
26008 return (bool) (clock2 < clock1);
26010 return false;
26011 #undef INSN_TICK
26014 /* Perform possible reodering of ready list for Atom/Silvermont only.
26015 Return issue rate. */
26016 static int
26017 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26018 int clock_var)
26020 int issue_rate = -1;
26021 int n_ready = *pn_ready;
26022 int i;
26023 rtx insn;
26024 int index = -1;
26026 /* Set up issue rate. */
26027 issue_rate = ix86_issue_rate ();
26029 /* Do reodering for BONNELL/SILVERMONT only. */
26030 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26031 return issue_rate;
26033 /* Nothing to do if ready list contains only 1 instruction. */
26034 if (n_ready <= 1)
26035 return issue_rate;
26037 /* Do reodering for post-reload scheduler only. */
26038 if (!reload_completed)
26039 return issue_rate;
26041 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26043 if (sched_verbose > 1)
26044 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26045 INSN_UID (ready[index]));
26047 /* Put IMUL producer (ready[index]) at the top of ready list. */
26048 insn = ready[index];
26049 for (i = index; i < n_ready - 1; i++)
26050 ready[i] = ready[i + 1];
26051 ready[n_ready - 1] = insn;
26052 return issue_rate;
26054 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26056 if (sched_verbose > 1)
26057 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26058 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26059 /* Swap 2 top elements of ready list. */
26060 insn = ready[n_ready - 1];
26061 ready[n_ready - 1] = ready[n_ready - 2];
26062 ready[n_ready - 2] = insn;
26064 return issue_rate;
26067 static bool
26068 ix86_class_likely_spilled_p (reg_class_t);
26070 /* Returns true if lhs of insn is HW function argument register and set up
26071 is_spilled to true if it is likely spilled HW register. */
26072 static bool
26073 insn_is_function_arg (rtx insn, bool* is_spilled)
26075 rtx dst;
26077 if (!NONDEBUG_INSN_P (insn))
26078 return false;
26079 /* Call instructions are not movable, ignore it. */
26080 if (CALL_P (insn))
26081 return false;
26082 insn = PATTERN (insn);
26083 if (GET_CODE (insn) == PARALLEL)
26084 insn = XVECEXP (insn, 0, 0);
26085 if (GET_CODE (insn) != SET)
26086 return false;
26087 dst = SET_DEST (insn);
26088 if (REG_P (dst) && HARD_REGISTER_P (dst)
26089 && ix86_function_arg_regno_p (REGNO (dst)))
26091 /* Is it likely spilled HW register? */
26092 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26093 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26094 *is_spilled = true;
26095 return true;
26097 return false;
26100 /* Add output dependencies for chain of function adjacent arguments if only
26101 there is a move to likely spilled HW register. Return first argument
26102 if at least one dependence was added or NULL otherwise. */
26103 static rtx
26104 add_parameter_dependencies (rtx call, rtx head)
26106 rtx insn;
26107 rtx last = call;
26108 rtx first_arg = NULL;
26109 bool is_spilled = false;
26111 head = PREV_INSN (head);
26113 /* Find nearest to call argument passing instruction. */
26114 while (true)
26116 last = PREV_INSN (last);
26117 if (last == head)
26118 return NULL;
26119 if (!NONDEBUG_INSN_P (last))
26120 continue;
26121 if (insn_is_function_arg (last, &is_spilled))
26122 break;
26123 return NULL;
26126 first_arg = last;
26127 while (true)
26129 insn = PREV_INSN (last);
26130 if (!INSN_P (insn))
26131 break;
26132 if (insn == head)
26133 break;
26134 if (!NONDEBUG_INSN_P (insn))
26136 last = insn;
26137 continue;
26139 if (insn_is_function_arg (insn, &is_spilled))
26141 /* Add output depdendence between two function arguments if chain
26142 of output arguments contains likely spilled HW registers. */
26143 if (is_spilled)
26144 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26145 first_arg = last = insn;
26147 else
26148 break;
26150 if (!is_spilled)
26151 return NULL;
26152 return first_arg;
26155 /* Add output or anti dependency from insn to first_arg to restrict its code
26156 motion. */
26157 static void
26158 avoid_func_arg_motion (rtx first_arg, rtx insn)
26160 rtx set;
26161 rtx tmp;
26163 set = single_set (insn);
26164 if (!set)
26165 return;
26166 tmp = SET_DEST (set);
26167 if (REG_P (tmp))
26169 /* Add output dependency to the first function argument. */
26170 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26171 return;
26173 /* Add anti dependency. */
26174 add_dependence (first_arg, insn, REG_DEP_ANTI);
26177 /* Avoid cross block motion of function argument through adding dependency
26178 from the first non-jump instruction in bb. */
26179 static void
26180 add_dependee_for_func_arg (rtx arg, basic_block bb)
26182 rtx insn = BB_END (bb);
26184 while (insn)
26186 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26188 rtx set = single_set (insn);
26189 if (set)
26191 avoid_func_arg_motion (arg, insn);
26192 return;
26195 if (insn == BB_HEAD (bb))
26196 return;
26197 insn = PREV_INSN (insn);
26201 /* Hook for pre-reload schedule - avoid motion of function arguments
26202 passed in likely spilled HW registers. */
26203 static void
26204 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26206 rtx insn;
26207 rtx first_arg = NULL;
26208 if (reload_completed)
26209 return;
26210 while (head != tail && DEBUG_INSN_P (head))
26211 head = NEXT_INSN (head);
26212 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26213 if (INSN_P (insn) && CALL_P (insn))
26215 first_arg = add_parameter_dependencies (insn, head);
26216 if (first_arg)
26218 /* Add dependee for first argument to predecessors if only
26219 region contains more than one block. */
26220 basic_block bb = BLOCK_FOR_INSN (insn);
26221 int rgn = CONTAINING_RGN (bb->index);
26222 int nr_blks = RGN_NR_BLOCKS (rgn);
26223 /* Skip trivial regions and region head blocks that can have
26224 predecessors outside of region. */
26225 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26227 edge e;
26228 edge_iterator ei;
26229 /* Assume that region is SCC, i.e. all immediate predecessors
26230 of non-head block are in the same region. */
26231 FOR_EACH_EDGE (e, ei, bb->preds)
26233 /* Avoid creating of loop-carried dependencies through
26234 using topological odering in region. */
26235 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26236 add_dependee_for_func_arg (first_arg, e->src);
26239 insn = first_arg;
26240 if (insn == head)
26241 break;
26244 else if (first_arg)
26245 avoid_func_arg_motion (first_arg, insn);
26248 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26249 HW registers to maximum, to schedule them at soon as possible. These are
26250 moves from function argument registers at the top of the function entry
26251 and moves from function return value registers after call. */
26252 static int
26253 ix86_adjust_priority (rtx insn, int priority)
26255 rtx set;
26257 if (reload_completed)
26258 return priority;
26260 if (!NONDEBUG_INSN_P (insn))
26261 return priority;
26263 set = single_set (insn);
26264 if (set)
26266 rtx tmp = SET_SRC (set);
26267 if (REG_P (tmp)
26268 && HARD_REGISTER_P (tmp)
26269 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26270 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26271 return current_sched_info->sched_max_insns_priority;
26274 return priority;
26277 /* Model decoder of Core 2/i7.
26278 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26279 track the instruction fetch block boundaries and make sure that long
26280 (9+ bytes) instructions are assigned to D0. */
26282 /* Maximum length of an insn that can be handled by
26283 a secondary decoder unit. '8' for Core 2/i7. */
26284 static int core2i7_secondary_decoder_max_insn_size;
26286 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26287 '16' for Core 2/i7. */
26288 static int core2i7_ifetch_block_size;
26290 /* Maximum number of instructions decoder can handle per cycle.
26291 '6' for Core 2/i7. */
26292 static int core2i7_ifetch_block_max_insns;
26294 typedef struct ix86_first_cycle_multipass_data_ *
26295 ix86_first_cycle_multipass_data_t;
26296 typedef const struct ix86_first_cycle_multipass_data_ *
26297 const_ix86_first_cycle_multipass_data_t;
26299 /* A variable to store target state across calls to max_issue within
26300 one cycle. */
26301 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26302 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26304 /* Initialize DATA. */
26305 static void
26306 core2i7_first_cycle_multipass_init (void *_data)
26308 ix86_first_cycle_multipass_data_t data
26309 = (ix86_first_cycle_multipass_data_t) _data;
26311 data->ifetch_block_len = 0;
26312 data->ifetch_block_n_insns = 0;
26313 data->ready_try_change = NULL;
26314 data->ready_try_change_size = 0;
26317 /* Advancing the cycle; reset ifetch block counts. */
26318 static void
26319 core2i7_dfa_post_advance_cycle (void)
26321 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26323 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26325 data->ifetch_block_len = 0;
26326 data->ifetch_block_n_insns = 0;
26329 static int min_insn_size (rtx);
26331 /* Filter out insns from ready_try that the core will not be able to issue
26332 on current cycle due to decoder. */
26333 static void
26334 core2i7_first_cycle_multipass_filter_ready_try
26335 (const_ix86_first_cycle_multipass_data_t data,
26336 char *ready_try, int n_ready, bool first_cycle_insn_p)
26338 while (n_ready--)
26340 rtx insn;
26341 int insn_size;
26343 if (ready_try[n_ready])
26344 continue;
26346 insn = get_ready_element (n_ready);
26347 insn_size = min_insn_size (insn);
26349 if (/* If this is a too long an insn for a secondary decoder ... */
26350 (!first_cycle_insn_p
26351 && insn_size > core2i7_secondary_decoder_max_insn_size)
26352 /* ... or it would not fit into the ifetch block ... */
26353 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26354 /* ... or the decoder is full already ... */
26355 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26356 /* ... mask the insn out. */
26358 ready_try[n_ready] = 1;
26360 if (data->ready_try_change)
26361 bitmap_set_bit (data->ready_try_change, n_ready);
26366 /* Prepare for a new round of multipass lookahead scheduling. */
26367 static void
26368 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26369 bool first_cycle_insn_p)
26371 ix86_first_cycle_multipass_data_t data
26372 = (ix86_first_cycle_multipass_data_t) _data;
26373 const_ix86_first_cycle_multipass_data_t prev_data
26374 = ix86_first_cycle_multipass_data;
26376 /* Restore the state from the end of the previous round. */
26377 data->ifetch_block_len = prev_data->ifetch_block_len;
26378 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26380 /* Filter instructions that cannot be issued on current cycle due to
26381 decoder restrictions. */
26382 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26383 first_cycle_insn_p);
26386 /* INSN is being issued in current solution. Account for its impact on
26387 the decoder model. */
26388 static void
26389 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26390 rtx insn, const void *_prev_data)
26392 ix86_first_cycle_multipass_data_t data
26393 = (ix86_first_cycle_multipass_data_t) _data;
26394 const_ix86_first_cycle_multipass_data_t prev_data
26395 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26397 int insn_size = min_insn_size (insn);
26399 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26400 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26401 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26402 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26404 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26405 if (!data->ready_try_change)
26407 data->ready_try_change = sbitmap_alloc (n_ready);
26408 data->ready_try_change_size = n_ready;
26410 else if (data->ready_try_change_size < n_ready)
26412 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26413 n_ready, 0);
26414 data->ready_try_change_size = n_ready;
26416 bitmap_clear (data->ready_try_change);
26418 /* Filter out insns from ready_try that the core will not be able to issue
26419 on current cycle due to decoder. */
26420 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26421 false);
26424 /* Revert the effect on ready_try. */
26425 static void
26426 core2i7_first_cycle_multipass_backtrack (const void *_data,
26427 char *ready_try,
26428 int n_ready ATTRIBUTE_UNUSED)
26430 const_ix86_first_cycle_multipass_data_t data
26431 = (const_ix86_first_cycle_multipass_data_t) _data;
26432 unsigned int i = 0;
26433 sbitmap_iterator sbi;
26435 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26436 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26438 ready_try[i] = 0;
26442 /* Save the result of multipass lookahead scheduling for the next round. */
26443 static void
26444 core2i7_first_cycle_multipass_end (const void *_data)
26446 const_ix86_first_cycle_multipass_data_t data
26447 = (const_ix86_first_cycle_multipass_data_t) _data;
26448 ix86_first_cycle_multipass_data_t next_data
26449 = ix86_first_cycle_multipass_data;
26451 if (data != NULL)
26453 next_data->ifetch_block_len = data->ifetch_block_len;
26454 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26458 /* Deallocate target data. */
26459 static void
26460 core2i7_first_cycle_multipass_fini (void *_data)
26462 ix86_first_cycle_multipass_data_t data
26463 = (ix86_first_cycle_multipass_data_t) _data;
26465 if (data->ready_try_change)
26467 sbitmap_free (data->ready_try_change);
26468 data->ready_try_change = NULL;
26469 data->ready_try_change_size = 0;
26473 /* Prepare for scheduling pass. */
26474 static void
26475 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26476 int verbose ATTRIBUTE_UNUSED,
26477 int max_uid ATTRIBUTE_UNUSED)
26479 /* Install scheduling hooks for current CPU. Some of these hooks are used
26480 in time-critical parts of the scheduler, so we only set them up when
26481 they are actually used. */
26482 switch (ix86_tune)
26484 case PROCESSOR_CORE2:
26485 case PROCESSOR_NEHALEM:
26486 case PROCESSOR_SANDYBRIDGE:
26487 case PROCESSOR_HASWELL:
26488 /* Do not perform multipass scheduling for pre-reload schedule
26489 to save compile time. */
26490 if (reload_completed)
26492 targetm.sched.dfa_post_advance_cycle
26493 = core2i7_dfa_post_advance_cycle;
26494 targetm.sched.first_cycle_multipass_init
26495 = core2i7_first_cycle_multipass_init;
26496 targetm.sched.first_cycle_multipass_begin
26497 = core2i7_first_cycle_multipass_begin;
26498 targetm.sched.first_cycle_multipass_issue
26499 = core2i7_first_cycle_multipass_issue;
26500 targetm.sched.first_cycle_multipass_backtrack
26501 = core2i7_first_cycle_multipass_backtrack;
26502 targetm.sched.first_cycle_multipass_end
26503 = core2i7_first_cycle_multipass_end;
26504 targetm.sched.first_cycle_multipass_fini
26505 = core2i7_first_cycle_multipass_fini;
26507 /* Set decoder parameters. */
26508 core2i7_secondary_decoder_max_insn_size = 8;
26509 core2i7_ifetch_block_size = 16;
26510 core2i7_ifetch_block_max_insns = 6;
26511 break;
26513 /* ... Fall through ... */
26514 default:
26515 targetm.sched.dfa_post_advance_cycle = NULL;
26516 targetm.sched.first_cycle_multipass_init = NULL;
26517 targetm.sched.first_cycle_multipass_begin = NULL;
26518 targetm.sched.first_cycle_multipass_issue = NULL;
26519 targetm.sched.first_cycle_multipass_backtrack = NULL;
26520 targetm.sched.first_cycle_multipass_end = NULL;
26521 targetm.sched.first_cycle_multipass_fini = NULL;
26522 break;
26527 /* Compute the alignment given to a constant that is being placed in memory.
26528 EXP is the constant and ALIGN is the alignment that the object would
26529 ordinarily have.
26530 The value of this function is used instead of that alignment to align
26531 the object. */
26534 ix86_constant_alignment (tree exp, int align)
26536 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26537 || TREE_CODE (exp) == INTEGER_CST)
26539 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26540 return 64;
26541 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26542 return 128;
26544 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26545 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26546 return BITS_PER_WORD;
26548 return align;
26551 /* Compute the alignment for a static variable.
26552 TYPE is the data type, and ALIGN is the alignment that
26553 the object would ordinarily have. The value of this function is used
26554 instead of that alignment to align the object. */
26557 ix86_data_alignment (tree type, int align, bool opt)
26559 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26560 for symbols from other compilation units or symbols that don't need
26561 to bind locally. In order to preserve some ABI compatibility with
26562 those compilers, ensure we don't decrease alignment from what we
26563 used to assume. */
26565 int max_align_compat
26566 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26568 /* A data structure, equal or greater than the size of a cache line
26569 (64 bytes in the Pentium 4 and other recent Intel processors, including
26570 processors based on Intel Core microarchitecture) should be aligned
26571 so that its base address is a multiple of a cache line size. */
26573 int max_align
26574 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26576 if (max_align < BITS_PER_WORD)
26577 max_align = BITS_PER_WORD;
26579 if (opt
26580 && AGGREGATE_TYPE_P (type)
26581 && TYPE_SIZE (type)
26582 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26584 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
26585 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26586 && align < max_align_compat)
26587 align = max_align_compat;
26588 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26589 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26590 && align < max_align)
26591 align = max_align;
26594 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26595 to 16byte boundary. */
26596 if (TARGET_64BIT)
26598 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26599 && TYPE_SIZE (type)
26600 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26601 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26602 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26603 return 128;
26606 if (!opt)
26607 return align;
26609 if (TREE_CODE (type) == ARRAY_TYPE)
26611 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26612 return 64;
26613 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26614 return 128;
26616 else if (TREE_CODE (type) == COMPLEX_TYPE)
26619 if (TYPE_MODE (type) == DCmode && align < 64)
26620 return 64;
26621 if ((TYPE_MODE (type) == XCmode
26622 || TYPE_MODE (type) == TCmode) && align < 128)
26623 return 128;
26625 else if ((TREE_CODE (type) == RECORD_TYPE
26626 || TREE_CODE (type) == UNION_TYPE
26627 || TREE_CODE (type) == QUAL_UNION_TYPE)
26628 && TYPE_FIELDS (type))
26630 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26631 return 64;
26632 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26633 return 128;
26635 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26636 || TREE_CODE (type) == INTEGER_TYPE)
26638 if (TYPE_MODE (type) == DFmode && align < 64)
26639 return 64;
26640 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26641 return 128;
26644 return align;
26647 /* Compute the alignment for a local variable or a stack slot. EXP is
26648 the data type or decl itself, MODE is the widest mode available and
26649 ALIGN is the alignment that the object would ordinarily have. The
26650 value of this macro is used instead of that alignment to align the
26651 object. */
26653 unsigned int
26654 ix86_local_alignment (tree exp, enum machine_mode mode,
26655 unsigned int align)
26657 tree type, decl;
26659 if (exp && DECL_P (exp))
26661 type = TREE_TYPE (exp);
26662 decl = exp;
26664 else
26666 type = exp;
26667 decl = NULL;
26670 /* Don't do dynamic stack realignment for long long objects with
26671 -mpreferred-stack-boundary=2. */
26672 if (!TARGET_64BIT
26673 && align == 64
26674 && ix86_preferred_stack_boundary < 64
26675 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26676 && (!type || !TYPE_USER_ALIGN (type))
26677 && (!decl || !DECL_USER_ALIGN (decl)))
26678 align = 32;
26680 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26681 register in MODE. We will return the largest alignment of XF
26682 and DF. */
26683 if (!type)
26685 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26686 align = GET_MODE_ALIGNMENT (DFmode);
26687 return align;
26690 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26691 to 16byte boundary. Exact wording is:
26693 An array uses the same alignment as its elements, except that a local or
26694 global array variable of length at least 16 bytes or
26695 a C99 variable-length array variable always has alignment of at least 16 bytes.
26697 This was added to allow use of aligned SSE instructions at arrays. This
26698 rule is meant for static storage (where compiler can not do the analysis
26699 by itself). We follow it for automatic variables only when convenient.
26700 We fully control everything in the function compiled and functions from
26701 other unit can not rely on the alignment.
26703 Exclude va_list type. It is the common case of local array where
26704 we can not benefit from the alignment.
26706 TODO: Probably one should optimize for size only when var is not escaping. */
26707 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26708 && TARGET_SSE)
26710 if (AGGREGATE_TYPE_P (type)
26711 && (va_list_type_node == NULL_TREE
26712 || (TYPE_MAIN_VARIANT (type)
26713 != TYPE_MAIN_VARIANT (va_list_type_node)))
26714 && TYPE_SIZE (type)
26715 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26716 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26717 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26718 return 128;
26720 if (TREE_CODE (type) == ARRAY_TYPE)
26722 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26723 return 64;
26724 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26725 return 128;
26727 else if (TREE_CODE (type) == COMPLEX_TYPE)
26729 if (TYPE_MODE (type) == DCmode && align < 64)
26730 return 64;
26731 if ((TYPE_MODE (type) == XCmode
26732 || TYPE_MODE (type) == TCmode) && align < 128)
26733 return 128;
26735 else if ((TREE_CODE (type) == RECORD_TYPE
26736 || TREE_CODE (type) == UNION_TYPE
26737 || TREE_CODE (type) == QUAL_UNION_TYPE)
26738 && TYPE_FIELDS (type))
26740 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26741 return 64;
26742 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26743 return 128;
26745 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26746 || TREE_CODE (type) == INTEGER_TYPE)
26749 if (TYPE_MODE (type) == DFmode && align < 64)
26750 return 64;
26751 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26752 return 128;
26754 return align;
26757 /* Compute the minimum required alignment for dynamic stack realignment
26758 purposes for a local variable, parameter or a stack slot. EXP is
26759 the data type or decl itself, MODE is its mode and ALIGN is the
26760 alignment that the object would ordinarily have. */
26762 unsigned int
26763 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26764 unsigned int align)
26766 tree type, decl;
26768 if (exp && DECL_P (exp))
26770 type = TREE_TYPE (exp);
26771 decl = exp;
26773 else
26775 type = exp;
26776 decl = NULL;
26779 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26780 return align;
26782 /* Don't do dynamic stack realignment for long long objects with
26783 -mpreferred-stack-boundary=2. */
26784 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26785 && (!type || !TYPE_USER_ALIGN (type))
26786 && (!decl || !DECL_USER_ALIGN (decl)))
26787 return 32;
26789 return align;
26792 /* Find a location for the static chain incoming to a nested function.
26793 This is a register, unless all free registers are used by arguments. */
26795 static rtx
26796 ix86_static_chain (const_tree fndecl, bool incoming_p)
26798 unsigned regno;
26800 if (!DECL_STATIC_CHAIN (fndecl))
26801 return NULL;
26803 if (TARGET_64BIT)
26805 /* We always use R10 in 64-bit mode. */
26806 regno = R10_REG;
26808 else
26810 tree fntype;
26811 unsigned int ccvt;
26813 /* By default in 32-bit mode we use ECX to pass the static chain. */
26814 regno = CX_REG;
26816 fntype = TREE_TYPE (fndecl);
26817 ccvt = ix86_get_callcvt (fntype);
26818 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26820 /* Fastcall functions use ecx/edx for arguments, which leaves
26821 us with EAX for the static chain.
26822 Thiscall functions use ecx for arguments, which also
26823 leaves us with EAX for the static chain. */
26824 regno = AX_REG;
26826 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26828 /* Thiscall functions use ecx for arguments, which leaves
26829 us with EAX and EDX for the static chain.
26830 We are using for abi-compatibility EAX. */
26831 regno = AX_REG;
26833 else if (ix86_function_regparm (fntype, fndecl) == 3)
26835 /* For regparm 3, we have no free call-clobbered registers in
26836 which to store the static chain. In order to implement this,
26837 we have the trampoline push the static chain to the stack.
26838 However, we can't push a value below the return address when
26839 we call the nested function directly, so we have to use an
26840 alternate entry point. For this we use ESI, and have the
26841 alternate entry point push ESI, so that things appear the
26842 same once we're executing the nested function. */
26843 if (incoming_p)
26845 if (fndecl == current_function_decl)
26846 ix86_static_chain_on_stack = true;
26847 return gen_frame_mem (SImode,
26848 plus_constant (Pmode,
26849 arg_pointer_rtx, -8));
26851 regno = SI_REG;
26855 return gen_rtx_REG (Pmode, regno);
26858 /* Emit RTL insns to initialize the variable parts of a trampoline.
26859 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26860 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26861 to be passed to the target function. */
26863 static void
26864 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26866 rtx mem, fnaddr;
26867 int opcode;
26868 int offset = 0;
26870 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26872 if (TARGET_64BIT)
26874 int size;
26876 /* Load the function address to r11. Try to load address using
26877 the shorter movl instead of movabs. We may want to support
26878 movq for kernel mode, but kernel does not use trampolines at
26879 the moment. FNADDR is a 32bit address and may not be in
26880 DImode when ptr_mode == SImode. Always use movl in this
26881 case. */
26882 if (ptr_mode == SImode
26883 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26885 fnaddr = copy_addr_to_reg (fnaddr);
26887 mem = adjust_address (m_tramp, HImode, offset);
26888 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26890 mem = adjust_address (m_tramp, SImode, offset + 2);
26891 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26892 offset += 6;
26894 else
26896 mem = adjust_address (m_tramp, HImode, offset);
26897 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26899 mem = adjust_address (m_tramp, DImode, offset + 2);
26900 emit_move_insn (mem, fnaddr);
26901 offset += 10;
26904 /* Load static chain using movabs to r10. Use the shorter movl
26905 instead of movabs when ptr_mode == SImode. */
26906 if (ptr_mode == SImode)
26908 opcode = 0xba41;
26909 size = 6;
26911 else
26913 opcode = 0xba49;
26914 size = 10;
26917 mem = adjust_address (m_tramp, HImode, offset);
26918 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26920 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26921 emit_move_insn (mem, chain_value);
26922 offset += size;
26924 /* Jump to r11; the last (unused) byte is a nop, only there to
26925 pad the write out to a single 32-bit store. */
26926 mem = adjust_address (m_tramp, SImode, offset);
26927 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26928 offset += 4;
26930 else
26932 rtx disp, chain;
26934 /* Depending on the static chain location, either load a register
26935 with a constant, or push the constant to the stack. All of the
26936 instructions are the same size. */
26937 chain = ix86_static_chain (fndecl, true);
26938 if (REG_P (chain))
26940 switch (REGNO (chain))
26942 case AX_REG:
26943 opcode = 0xb8; break;
26944 case CX_REG:
26945 opcode = 0xb9; break;
26946 default:
26947 gcc_unreachable ();
26950 else
26951 opcode = 0x68;
26953 mem = adjust_address (m_tramp, QImode, offset);
26954 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26956 mem = adjust_address (m_tramp, SImode, offset + 1);
26957 emit_move_insn (mem, chain_value);
26958 offset += 5;
26960 mem = adjust_address (m_tramp, QImode, offset);
26961 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
26963 mem = adjust_address (m_tramp, SImode, offset + 1);
26965 /* Compute offset from the end of the jmp to the target function.
26966 In the case in which the trampoline stores the static chain on
26967 the stack, we need to skip the first insn which pushes the
26968 (call-saved) register static chain; this push is 1 byte. */
26969 offset += 5;
26970 disp = expand_binop (SImode, sub_optab, fnaddr,
26971 plus_constant (Pmode, XEXP (m_tramp, 0),
26972 offset - (MEM_P (chain) ? 1 : 0)),
26973 NULL_RTX, 1, OPTAB_DIRECT);
26974 emit_move_insn (mem, disp);
26977 gcc_assert (offset <= TRAMPOLINE_SIZE);
26979 #ifdef HAVE_ENABLE_EXECUTE_STACK
26980 #ifdef CHECK_EXECUTE_STACK_ENABLED
26981 if (CHECK_EXECUTE_STACK_ENABLED)
26982 #endif
26983 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
26984 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
26985 #endif
26988 /* The following file contains several enumerations and data structures
26989 built from the definitions in i386-builtin-types.def. */
26991 #include "i386-builtin-types.inc"
26993 /* Table for the ix86 builtin non-function types. */
26994 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
26996 /* Retrieve an element from the above table, building some of
26997 the types lazily. */
26999 static tree
27000 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27002 unsigned int index;
27003 tree type, itype;
27005 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27007 type = ix86_builtin_type_tab[(int) tcode];
27008 if (type != NULL)
27009 return type;
27011 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27012 if (tcode <= IX86_BT_LAST_VECT)
27014 enum machine_mode mode;
27016 index = tcode - IX86_BT_LAST_PRIM - 1;
27017 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27018 mode = ix86_builtin_type_vect_mode[index];
27020 type = build_vector_type_for_mode (itype, mode);
27022 else
27024 int quals;
27026 index = tcode - IX86_BT_LAST_VECT - 1;
27027 if (tcode <= IX86_BT_LAST_PTR)
27028 quals = TYPE_UNQUALIFIED;
27029 else
27030 quals = TYPE_QUAL_CONST;
27032 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27033 if (quals != TYPE_UNQUALIFIED)
27034 itype = build_qualified_type (itype, quals);
27036 type = build_pointer_type (itype);
27039 ix86_builtin_type_tab[(int) tcode] = type;
27040 return type;
27043 /* Table for the ix86 builtin function types. */
27044 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27046 /* Retrieve an element from the above table, building some of
27047 the types lazily. */
27049 static tree
27050 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27052 tree type;
27054 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27056 type = ix86_builtin_func_type_tab[(int) tcode];
27057 if (type != NULL)
27058 return type;
27060 if (tcode <= IX86_BT_LAST_FUNC)
27062 unsigned start = ix86_builtin_func_start[(int) tcode];
27063 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27064 tree rtype, atype, args = void_list_node;
27065 unsigned i;
27067 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27068 for (i = after - 1; i > start; --i)
27070 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27071 args = tree_cons (NULL, atype, args);
27074 type = build_function_type (rtype, args);
27076 else
27078 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27079 enum ix86_builtin_func_type icode;
27081 icode = ix86_builtin_func_alias_base[index];
27082 type = ix86_get_builtin_func_type (icode);
27085 ix86_builtin_func_type_tab[(int) tcode] = type;
27086 return type;
27090 /* Codes for all the SSE/MMX builtins. */
27091 enum ix86_builtins
27093 IX86_BUILTIN_ADDPS,
27094 IX86_BUILTIN_ADDSS,
27095 IX86_BUILTIN_DIVPS,
27096 IX86_BUILTIN_DIVSS,
27097 IX86_BUILTIN_MULPS,
27098 IX86_BUILTIN_MULSS,
27099 IX86_BUILTIN_SUBPS,
27100 IX86_BUILTIN_SUBSS,
27102 IX86_BUILTIN_CMPEQPS,
27103 IX86_BUILTIN_CMPLTPS,
27104 IX86_BUILTIN_CMPLEPS,
27105 IX86_BUILTIN_CMPGTPS,
27106 IX86_BUILTIN_CMPGEPS,
27107 IX86_BUILTIN_CMPNEQPS,
27108 IX86_BUILTIN_CMPNLTPS,
27109 IX86_BUILTIN_CMPNLEPS,
27110 IX86_BUILTIN_CMPNGTPS,
27111 IX86_BUILTIN_CMPNGEPS,
27112 IX86_BUILTIN_CMPORDPS,
27113 IX86_BUILTIN_CMPUNORDPS,
27114 IX86_BUILTIN_CMPEQSS,
27115 IX86_BUILTIN_CMPLTSS,
27116 IX86_BUILTIN_CMPLESS,
27117 IX86_BUILTIN_CMPNEQSS,
27118 IX86_BUILTIN_CMPNLTSS,
27119 IX86_BUILTIN_CMPNLESS,
27120 IX86_BUILTIN_CMPORDSS,
27121 IX86_BUILTIN_CMPUNORDSS,
27123 IX86_BUILTIN_COMIEQSS,
27124 IX86_BUILTIN_COMILTSS,
27125 IX86_BUILTIN_COMILESS,
27126 IX86_BUILTIN_COMIGTSS,
27127 IX86_BUILTIN_COMIGESS,
27128 IX86_BUILTIN_COMINEQSS,
27129 IX86_BUILTIN_UCOMIEQSS,
27130 IX86_BUILTIN_UCOMILTSS,
27131 IX86_BUILTIN_UCOMILESS,
27132 IX86_BUILTIN_UCOMIGTSS,
27133 IX86_BUILTIN_UCOMIGESS,
27134 IX86_BUILTIN_UCOMINEQSS,
27136 IX86_BUILTIN_CVTPI2PS,
27137 IX86_BUILTIN_CVTPS2PI,
27138 IX86_BUILTIN_CVTSI2SS,
27139 IX86_BUILTIN_CVTSI642SS,
27140 IX86_BUILTIN_CVTSS2SI,
27141 IX86_BUILTIN_CVTSS2SI64,
27142 IX86_BUILTIN_CVTTPS2PI,
27143 IX86_BUILTIN_CVTTSS2SI,
27144 IX86_BUILTIN_CVTTSS2SI64,
27146 IX86_BUILTIN_MAXPS,
27147 IX86_BUILTIN_MAXSS,
27148 IX86_BUILTIN_MINPS,
27149 IX86_BUILTIN_MINSS,
27151 IX86_BUILTIN_LOADUPS,
27152 IX86_BUILTIN_STOREUPS,
27153 IX86_BUILTIN_MOVSS,
27155 IX86_BUILTIN_MOVHLPS,
27156 IX86_BUILTIN_MOVLHPS,
27157 IX86_BUILTIN_LOADHPS,
27158 IX86_BUILTIN_LOADLPS,
27159 IX86_BUILTIN_STOREHPS,
27160 IX86_BUILTIN_STORELPS,
27162 IX86_BUILTIN_MASKMOVQ,
27163 IX86_BUILTIN_MOVMSKPS,
27164 IX86_BUILTIN_PMOVMSKB,
27166 IX86_BUILTIN_MOVNTPS,
27167 IX86_BUILTIN_MOVNTQ,
27169 IX86_BUILTIN_LOADDQU,
27170 IX86_BUILTIN_STOREDQU,
27172 IX86_BUILTIN_PACKSSWB,
27173 IX86_BUILTIN_PACKSSDW,
27174 IX86_BUILTIN_PACKUSWB,
27176 IX86_BUILTIN_PADDB,
27177 IX86_BUILTIN_PADDW,
27178 IX86_BUILTIN_PADDD,
27179 IX86_BUILTIN_PADDQ,
27180 IX86_BUILTIN_PADDSB,
27181 IX86_BUILTIN_PADDSW,
27182 IX86_BUILTIN_PADDUSB,
27183 IX86_BUILTIN_PADDUSW,
27184 IX86_BUILTIN_PSUBB,
27185 IX86_BUILTIN_PSUBW,
27186 IX86_BUILTIN_PSUBD,
27187 IX86_BUILTIN_PSUBQ,
27188 IX86_BUILTIN_PSUBSB,
27189 IX86_BUILTIN_PSUBSW,
27190 IX86_BUILTIN_PSUBUSB,
27191 IX86_BUILTIN_PSUBUSW,
27193 IX86_BUILTIN_PAND,
27194 IX86_BUILTIN_PANDN,
27195 IX86_BUILTIN_POR,
27196 IX86_BUILTIN_PXOR,
27198 IX86_BUILTIN_PAVGB,
27199 IX86_BUILTIN_PAVGW,
27201 IX86_BUILTIN_PCMPEQB,
27202 IX86_BUILTIN_PCMPEQW,
27203 IX86_BUILTIN_PCMPEQD,
27204 IX86_BUILTIN_PCMPGTB,
27205 IX86_BUILTIN_PCMPGTW,
27206 IX86_BUILTIN_PCMPGTD,
27208 IX86_BUILTIN_PMADDWD,
27210 IX86_BUILTIN_PMAXSW,
27211 IX86_BUILTIN_PMAXUB,
27212 IX86_BUILTIN_PMINSW,
27213 IX86_BUILTIN_PMINUB,
27215 IX86_BUILTIN_PMULHUW,
27216 IX86_BUILTIN_PMULHW,
27217 IX86_BUILTIN_PMULLW,
27219 IX86_BUILTIN_PSADBW,
27220 IX86_BUILTIN_PSHUFW,
27222 IX86_BUILTIN_PSLLW,
27223 IX86_BUILTIN_PSLLD,
27224 IX86_BUILTIN_PSLLQ,
27225 IX86_BUILTIN_PSRAW,
27226 IX86_BUILTIN_PSRAD,
27227 IX86_BUILTIN_PSRLW,
27228 IX86_BUILTIN_PSRLD,
27229 IX86_BUILTIN_PSRLQ,
27230 IX86_BUILTIN_PSLLWI,
27231 IX86_BUILTIN_PSLLDI,
27232 IX86_BUILTIN_PSLLQI,
27233 IX86_BUILTIN_PSRAWI,
27234 IX86_BUILTIN_PSRADI,
27235 IX86_BUILTIN_PSRLWI,
27236 IX86_BUILTIN_PSRLDI,
27237 IX86_BUILTIN_PSRLQI,
27239 IX86_BUILTIN_PUNPCKHBW,
27240 IX86_BUILTIN_PUNPCKHWD,
27241 IX86_BUILTIN_PUNPCKHDQ,
27242 IX86_BUILTIN_PUNPCKLBW,
27243 IX86_BUILTIN_PUNPCKLWD,
27244 IX86_BUILTIN_PUNPCKLDQ,
27246 IX86_BUILTIN_SHUFPS,
27248 IX86_BUILTIN_RCPPS,
27249 IX86_BUILTIN_RCPSS,
27250 IX86_BUILTIN_RSQRTPS,
27251 IX86_BUILTIN_RSQRTPS_NR,
27252 IX86_BUILTIN_RSQRTSS,
27253 IX86_BUILTIN_RSQRTF,
27254 IX86_BUILTIN_SQRTPS,
27255 IX86_BUILTIN_SQRTPS_NR,
27256 IX86_BUILTIN_SQRTSS,
27258 IX86_BUILTIN_UNPCKHPS,
27259 IX86_BUILTIN_UNPCKLPS,
27261 IX86_BUILTIN_ANDPS,
27262 IX86_BUILTIN_ANDNPS,
27263 IX86_BUILTIN_ORPS,
27264 IX86_BUILTIN_XORPS,
27266 IX86_BUILTIN_EMMS,
27267 IX86_BUILTIN_LDMXCSR,
27268 IX86_BUILTIN_STMXCSR,
27269 IX86_BUILTIN_SFENCE,
27271 IX86_BUILTIN_FXSAVE,
27272 IX86_BUILTIN_FXRSTOR,
27273 IX86_BUILTIN_FXSAVE64,
27274 IX86_BUILTIN_FXRSTOR64,
27276 IX86_BUILTIN_XSAVE,
27277 IX86_BUILTIN_XRSTOR,
27278 IX86_BUILTIN_XSAVE64,
27279 IX86_BUILTIN_XRSTOR64,
27281 IX86_BUILTIN_XSAVEOPT,
27282 IX86_BUILTIN_XSAVEOPT64,
27284 /* 3DNow! Original */
27285 IX86_BUILTIN_FEMMS,
27286 IX86_BUILTIN_PAVGUSB,
27287 IX86_BUILTIN_PF2ID,
27288 IX86_BUILTIN_PFACC,
27289 IX86_BUILTIN_PFADD,
27290 IX86_BUILTIN_PFCMPEQ,
27291 IX86_BUILTIN_PFCMPGE,
27292 IX86_BUILTIN_PFCMPGT,
27293 IX86_BUILTIN_PFMAX,
27294 IX86_BUILTIN_PFMIN,
27295 IX86_BUILTIN_PFMUL,
27296 IX86_BUILTIN_PFRCP,
27297 IX86_BUILTIN_PFRCPIT1,
27298 IX86_BUILTIN_PFRCPIT2,
27299 IX86_BUILTIN_PFRSQIT1,
27300 IX86_BUILTIN_PFRSQRT,
27301 IX86_BUILTIN_PFSUB,
27302 IX86_BUILTIN_PFSUBR,
27303 IX86_BUILTIN_PI2FD,
27304 IX86_BUILTIN_PMULHRW,
27306 /* 3DNow! Athlon Extensions */
27307 IX86_BUILTIN_PF2IW,
27308 IX86_BUILTIN_PFNACC,
27309 IX86_BUILTIN_PFPNACC,
27310 IX86_BUILTIN_PI2FW,
27311 IX86_BUILTIN_PSWAPDSI,
27312 IX86_BUILTIN_PSWAPDSF,
27314 /* SSE2 */
27315 IX86_BUILTIN_ADDPD,
27316 IX86_BUILTIN_ADDSD,
27317 IX86_BUILTIN_DIVPD,
27318 IX86_BUILTIN_DIVSD,
27319 IX86_BUILTIN_MULPD,
27320 IX86_BUILTIN_MULSD,
27321 IX86_BUILTIN_SUBPD,
27322 IX86_BUILTIN_SUBSD,
27324 IX86_BUILTIN_CMPEQPD,
27325 IX86_BUILTIN_CMPLTPD,
27326 IX86_BUILTIN_CMPLEPD,
27327 IX86_BUILTIN_CMPGTPD,
27328 IX86_BUILTIN_CMPGEPD,
27329 IX86_BUILTIN_CMPNEQPD,
27330 IX86_BUILTIN_CMPNLTPD,
27331 IX86_BUILTIN_CMPNLEPD,
27332 IX86_BUILTIN_CMPNGTPD,
27333 IX86_BUILTIN_CMPNGEPD,
27334 IX86_BUILTIN_CMPORDPD,
27335 IX86_BUILTIN_CMPUNORDPD,
27336 IX86_BUILTIN_CMPEQSD,
27337 IX86_BUILTIN_CMPLTSD,
27338 IX86_BUILTIN_CMPLESD,
27339 IX86_BUILTIN_CMPNEQSD,
27340 IX86_BUILTIN_CMPNLTSD,
27341 IX86_BUILTIN_CMPNLESD,
27342 IX86_BUILTIN_CMPORDSD,
27343 IX86_BUILTIN_CMPUNORDSD,
27345 IX86_BUILTIN_COMIEQSD,
27346 IX86_BUILTIN_COMILTSD,
27347 IX86_BUILTIN_COMILESD,
27348 IX86_BUILTIN_COMIGTSD,
27349 IX86_BUILTIN_COMIGESD,
27350 IX86_BUILTIN_COMINEQSD,
27351 IX86_BUILTIN_UCOMIEQSD,
27352 IX86_BUILTIN_UCOMILTSD,
27353 IX86_BUILTIN_UCOMILESD,
27354 IX86_BUILTIN_UCOMIGTSD,
27355 IX86_BUILTIN_UCOMIGESD,
27356 IX86_BUILTIN_UCOMINEQSD,
27358 IX86_BUILTIN_MAXPD,
27359 IX86_BUILTIN_MAXSD,
27360 IX86_BUILTIN_MINPD,
27361 IX86_BUILTIN_MINSD,
27363 IX86_BUILTIN_ANDPD,
27364 IX86_BUILTIN_ANDNPD,
27365 IX86_BUILTIN_ORPD,
27366 IX86_BUILTIN_XORPD,
27368 IX86_BUILTIN_SQRTPD,
27369 IX86_BUILTIN_SQRTSD,
27371 IX86_BUILTIN_UNPCKHPD,
27372 IX86_BUILTIN_UNPCKLPD,
27374 IX86_BUILTIN_SHUFPD,
27376 IX86_BUILTIN_LOADUPD,
27377 IX86_BUILTIN_STOREUPD,
27378 IX86_BUILTIN_MOVSD,
27380 IX86_BUILTIN_LOADHPD,
27381 IX86_BUILTIN_LOADLPD,
27383 IX86_BUILTIN_CVTDQ2PD,
27384 IX86_BUILTIN_CVTDQ2PS,
27386 IX86_BUILTIN_CVTPD2DQ,
27387 IX86_BUILTIN_CVTPD2PI,
27388 IX86_BUILTIN_CVTPD2PS,
27389 IX86_BUILTIN_CVTTPD2DQ,
27390 IX86_BUILTIN_CVTTPD2PI,
27392 IX86_BUILTIN_CVTPI2PD,
27393 IX86_BUILTIN_CVTSI2SD,
27394 IX86_BUILTIN_CVTSI642SD,
27396 IX86_BUILTIN_CVTSD2SI,
27397 IX86_BUILTIN_CVTSD2SI64,
27398 IX86_BUILTIN_CVTSD2SS,
27399 IX86_BUILTIN_CVTSS2SD,
27400 IX86_BUILTIN_CVTTSD2SI,
27401 IX86_BUILTIN_CVTTSD2SI64,
27403 IX86_BUILTIN_CVTPS2DQ,
27404 IX86_BUILTIN_CVTPS2PD,
27405 IX86_BUILTIN_CVTTPS2DQ,
27407 IX86_BUILTIN_MOVNTI,
27408 IX86_BUILTIN_MOVNTI64,
27409 IX86_BUILTIN_MOVNTPD,
27410 IX86_BUILTIN_MOVNTDQ,
27412 IX86_BUILTIN_MOVQ128,
27414 /* SSE2 MMX */
27415 IX86_BUILTIN_MASKMOVDQU,
27416 IX86_BUILTIN_MOVMSKPD,
27417 IX86_BUILTIN_PMOVMSKB128,
27419 IX86_BUILTIN_PACKSSWB128,
27420 IX86_BUILTIN_PACKSSDW128,
27421 IX86_BUILTIN_PACKUSWB128,
27423 IX86_BUILTIN_PADDB128,
27424 IX86_BUILTIN_PADDW128,
27425 IX86_BUILTIN_PADDD128,
27426 IX86_BUILTIN_PADDQ128,
27427 IX86_BUILTIN_PADDSB128,
27428 IX86_BUILTIN_PADDSW128,
27429 IX86_BUILTIN_PADDUSB128,
27430 IX86_BUILTIN_PADDUSW128,
27431 IX86_BUILTIN_PSUBB128,
27432 IX86_BUILTIN_PSUBW128,
27433 IX86_BUILTIN_PSUBD128,
27434 IX86_BUILTIN_PSUBQ128,
27435 IX86_BUILTIN_PSUBSB128,
27436 IX86_BUILTIN_PSUBSW128,
27437 IX86_BUILTIN_PSUBUSB128,
27438 IX86_BUILTIN_PSUBUSW128,
27440 IX86_BUILTIN_PAND128,
27441 IX86_BUILTIN_PANDN128,
27442 IX86_BUILTIN_POR128,
27443 IX86_BUILTIN_PXOR128,
27445 IX86_BUILTIN_PAVGB128,
27446 IX86_BUILTIN_PAVGW128,
27448 IX86_BUILTIN_PCMPEQB128,
27449 IX86_BUILTIN_PCMPEQW128,
27450 IX86_BUILTIN_PCMPEQD128,
27451 IX86_BUILTIN_PCMPGTB128,
27452 IX86_BUILTIN_PCMPGTW128,
27453 IX86_BUILTIN_PCMPGTD128,
27455 IX86_BUILTIN_PMADDWD128,
27457 IX86_BUILTIN_PMAXSW128,
27458 IX86_BUILTIN_PMAXUB128,
27459 IX86_BUILTIN_PMINSW128,
27460 IX86_BUILTIN_PMINUB128,
27462 IX86_BUILTIN_PMULUDQ,
27463 IX86_BUILTIN_PMULUDQ128,
27464 IX86_BUILTIN_PMULHUW128,
27465 IX86_BUILTIN_PMULHW128,
27466 IX86_BUILTIN_PMULLW128,
27468 IX86_BUILTIN_PSADBW128,
27469 IX86_BUILTIN_PSHUFHW,
27470 IX86_BUILTIN_PSHUFLW,
27471 IX86_BUILTIN_PSHUFD,
27473 IX86_BUILTIN_PSLLDQI128,
27474 IX86_BUILTIN_PSLLWI128,
27475 IX86_BUILTIN_PSLLDI128,
27476 IX86_BUILTIN_PSLLQI128,
27477 IX86_BUILTIN_PSRAWI128,
27478 IX86_BUILTIN_PSRADI128,
27479 IX86_BUILTIN_PSRLDQI128,
27480 IX86_BUILTIN_PSRLWI128,
27481 IX86_BUILTIN_PSRLDI128,
27482 IX86_BUILTIN_PSRLQI128,
27484 IX86_BUILTIN_PSLLDQ128,
27485 IX86_BUILTIN_PSLLW128,
27486 IX86_BUILTIN_PSLLD128,
27487 IX86_BUILTIN_PSLLQ128,
27488 IX86_BUILTIN_PSRAW128,
27489 IX86_BUILTIN_PSRAD128,
27490 IX86_BUILTIN_PSRLW128,
27491 IX86_BUILTIN_PSRLD128,
27492 IX86_BUILTIN_PSRLQ128,
27494 IX86_BUILTIN_PUNPCKHBW128,
27495 IX86_BUILTIN_PUNPCKHWD128,
27496 IX86_BUILTIN_PUNPCKHDQ128,
27497 IX86_BUILTIN_PUNPCKHQDQ128,
27498 IX86_BUILTIN_PUNPCKLBW128,
27499 IX86_BUILTIN_PUNPCKLWD128,
27500 IX86_BUILTIN_PUNPCKLDQ128,
27501 IX86_BUILTIN_PUNPCKLQDQ128,
27503 IX86_BUILTIN_CLFLUSH,
27504 IX86_BUILTIN_MFENCE,
27505 IX86_BUILTIN_LFENCE,
27506 IX86_BUILTIN_PAUSE,
27508 IX86_BUILTIN_FNSTENV,
27509 IX86_BUILTIN_FLDENV,
27510 IX86_BUILTIN_FNSTSW,
27511 IX86_BUILTIN_FNCLEX,
27513 IX86_BUILTIN_BSRSI,
27514 IX86_BUILTIN_BSRDI,
27515 IX86_BUILTIN_RDPMC,
27516 IX86_BUILTIN_RDTSC,
27517 IX86_BUILTIN_RDTSCP,
27518 IX86_BUILTIN_ROLQI,
27519 IX86_BUILTIN_ROLHI,
27520 IX86_BUILTIN_RORQI,
27521 IX86_BUILTIN_RORHI,
27523 /* SSE3. */
27524 IX86_BUILTIN_ADDSUBPS,
27525 IX86_BUILTIN_HADDPS,
27526 IX86_BUILTIN_HSUBPS,
27527 IX86_BUILTIN_MOVSHDUP,
27528 IX86_BUILTIN_MOVSLDUP,
27529 IX86_BUILTIN_ADDSUBPD,
27530 IX86_BUILTIN_HADDPD,
27531 IX86_BUILTIN_HSUBPD,
27532 IX86_BUILTIN_LDDQU,
27534 IX86_BUILTIN_MONITOR,
27535 IX86_BUILTIN_MWAIT,
27537 /* SSSE3. */
27538 IX86_BUILTIN_PHADDW,
27539 IX86_BUILTIN_PHADDD,
27540 IX86_BUILTIN_PHADDSW,
27541 IX86_BUILTIN_PHSUBW,
27542 IX86_BUILTIN_PHSUBD,
27543 IX86_BUILTIN_PHSUBSW,
27544 IX86_BUILTIN_PMADDUBSW,
27545 IX86_BUILTIN_PMULHRSW,
27546 IX86_BUILTIN_PSHUFB,
27547 IX86_BUILTIN_PSIGNB,
27548 IX86_BUILTIN_PSIGNW,
27549 IX86_BUILTIN_PSIGND,
27550 IX86_BUILTIN_PALIGNR,
27551 IX86_BUILTIN_PABSB,
27552 IX86_BUILTIN_PABSW,
27553 IX86_BUILTIN_PABSD,
27555 IX86_BUILTIN_PHADDW128,
27556 IX86_BUILTIN_PHADDD128,
27557 IX86_BUILTIN_PHADDSW128,
27558 IX86_BUILTIN_PHSUBW128,
27559 IX86_BUILTIN_PHSUBD128,
27560 IX86_BUILTIN_PHSUBSW128,
27561 IX86_BUILTIN_PMADDUBSW128,
27562 IX86_BUILTIN_PMULHRSW128,
27563 IX86_BUILTIN_PSHUFB128,
27564 IX86_BUILTIN_PSIGNB128,
27565 IX86_BUILTIN_PSIGNW128,
27566 IX86_BUILTIN_PSIGND128,
27567 IX86_BUILTIN_PALIGNR128,
27568 IX86_BUILTIN_PABSB128,
27569 IX86_BUILTIN_PABSW128,
27570 IX86_BUILTIN_PABSD128,
27572 /* AMDFAM10 - SSE4A New Instructions. */
27573 IX86_BUILTIN_MOVNTSD,
27574 IX86_BUILTIN_MOVNTSS,
27575 IX86_BUILTIN_EXTRQI,
27576 IX86_BUILTIN_EXTRQ,
27577 IX86_BUILTIN_INSERTQI,
27578 IX86_BUILTIN_INSERTQ,
27580 /* SSE4.1. */
27581 IX86_BUILTIN_BLENDPD,
27582 IX86_BUILTIN_BLENDPS,
27583 IX86_BUILTIN_BLENDVPD,
27584 IX86_BUILTIN_BLENDVPS,
27585 IX86_BUILTIN_PBLENDVB128,
27586 IX86_BUILTIN_PBLENDW128,
27588 IX86_BUILTIN_DPPD,
27589 IX86_BUILTIN_DPPS,
27591 IX86_BUILTIN_INSERTPS128,
27593 IX86_BUILTIN_MOVNTDQA,
27594 IX86_BUILTIN_MPSADBW128,
27595 IX86_BUILTIN_PACKUSDW128,
27596 IX86_BUILTIN_PCMPEQQ,
27597 IX86_BUILTIN_PHMINPOSUW128,
27599 IX86_BUILTIN_PMAXSB128,
27600 IX86_BUILTIN_PMAXSD128,
27601 IX86_BUILTIN_PMAXUD128,
27602 IX86_BUILTIN_PMAXUW128,
27604 IX86_BUILTIN_PMINSB128,
27605 IX86_BUILTIN_PMINSD128,
27606 IX86_BUILTIN_PMINUD128,
27607 IX86_BUILTIN_PMINUW128,
27609 IX86_BUILTIN_PMOVSXBW128,
27610 IX86_BUILTIN_PMOVSXBD128,
27611 IX86_BUILTIN_PMOVSXBQ128,
27612 IX86_BUILTIN_PMOVSXWD128,
27613 IX86_BUILTIN_PMOVSXWQ128,
27614 IX86_BUILTIN_PMOVSXDQ128,
27616 IX86_BUILTIN_PMOVZXBW128,
27617 IX86_BUILTIN_PMOVZXBD128,
27618 IX86_BUILTIN_PMOVZXBQ128,
27619 IX86_BUILTIN_PMOVZXWD128,
27620 IX86_BUILTIN_PMOVZXWQ128,
27621 IX86_BUILTIN_PMOVZXDQ128,
27623 IX86_BUILTIN_PMULDQ128,
27624 IX86_BUILTIN_PMULLD128,
27626 IX86_BUILTIN_ROUNDSD,
27627 IX86_BUILTIN_ROUNDSS,
27629 IX86_BUILTIN_ROUNDPD,
27630 IX86_BUILTIN_ROUNDPS,
27632 IX86_BUILTIN_FLOORPD,
27633 IX86_BUILTIN_CEILPD,
27634 IX86_BUILTIN_TRUNCPD,
27635 IX86_BUILTIN_RINTPD,
27636 IX86_BUILTIN_ROUNDPD_AZ,
27638 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27639 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27640 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27642 IX86_BUILTIN_FLOORPS,
27643 IX86_BUILTIN_CEILPS,
27644 IX86_BUILTIN_TRUNCPS,
27645 IX86_BUILTIN_RINTPS,
27646 IX86_BUILTIN_ROUNDPS_AZ,
27648 IX86_BUILTIN_FLOORPS_SFIX,
27649 IX86_BUILTIN_CEILPS_SFIX,
27650 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27652 IX86_BUILTIN_PTESTZ,
27653 IX86_BUILTIN_PTESTC,
27654 IX86_BUILTIN_PTESTNZC,
27656 IX86_BUILTIN_VEC_INIT_V2SI,
27657 IX86_BUILTIN_VEC_INIT_V4HI,
27658 IX86_BUILTIN_VEC_INIT_V8QI,
27659 IX86_BUILTIN_VEC_EXT_V2DF,
27660 IX86_BUILTIN_VEC_EXT_V2DI,
27661 IX86_BUILTIN_VEC_EXT_V4SF,
27662 IX86_BUILTIN_VEC_EXT_V4SI,
27663 IX86_BUILTIN_VEC_EXT_V8HI,
27664 IX86_BUILTIN_VEC_EXT_V2SI,
27665 IX86_BUILTIN_VEC_EXT_V4HI,
27666 IX86_BUILTIN_VEC_EXT_V16QI,
27667 IX86_BUILTIN_VEC_SET_V2DI,
27668 IX86_BUILTIN_VEC_SET_V4SF,
27669 IX86_BUILTIN_VEC_SET_V4SI,
27670 IX86_BUILTIN_VEC_SET_V8HI,
27671 IX86_BUILTIN_VEC_SET_V4HI,
27672 IX86_BUILTIN_VEC_SET_V16QI,
27674 IX86_BUILTIN_VEC_PACK_SFIX,
27675 IX86_BUILTIN_VEC_PACK_SFIX256,
27677 /* SSE4.2. */
27678 IX86_BUILTIN_CRC32QI,
27679 IX86_BUILTIN_CRC32HI,
27680 IX86_BUILTIN_CRC32SI,
27681 IX86_BUILTIN_CRC32DI,
27683 IX86_BUILTIN_PCMPESTRI128,
27684 IX86_BUILTIN_PCMPESTRM128,
27685 IX86_BUILTIN_PCMPESTRA128,
27686 IX86_BUILTIN_PCMPESTRC128,
27687 IX86_BUILTIN_PCMPESTRO128,
27688 IX86_BUILTIN_PCMPESTRS128,
27689 IX86_BUILTIN_PCMPESTRZ128,
27690 IX86_BUILTIN_PCMPISTRI128,
27691 IX86_BUILTIN_PCMPISTRM128,
27692 IX86_BUILTIN_PCMPISTRA128,
27693 IX86_BUILTIN_PCMPISTRC128,
27694 IX86_BUILTIN_PCMPISTRO128,
27695 IX86_BUILTIN_PCMPISTRS128,
27696 IX86_BUILTIN_PCMPISTRZ128,
27698 IX86_BUILTIN_PCMPGTQ,
27700 /* AES instructions */
27701 IX86_BUILTIN_AESENC128,
27702 IX86_BUILTIN_AESENCLAST128,
27703 IX86_BUILTIN_AESDEC128,
27704 IX86_BUILTIN_AESDECLAST128,
27705 IX86_BUILTIN_AESIMC128,
27706 IX86_BUILTIN_AESKEYGENASSIST128,
27708 /* PCLMUL instruction */
27709 IX86_BUILTIN_PCLMULQDQ128,
27711 /* AVX */
27712 IX86_BUILTIN_ADDPD256,
27713 IX86_BUILTIN_ADDPS256,
27714 IX86_BUILTIN_ADDSUBPD256,
27715 IX86_BUILTIN_ADDSUBPS256,
27716 IX86_BUILTIN_ANDPD256,
27717 IX86_BUILTIN_ANDPS256,
27718 IX86_BUILTIN_ANDNPD256,
27719 IX86_BUILTIN_ANDNPS256,
27720 IX86_BUILTIN_BLENDPD256,
27721 IX86_BUILTIN_BLENDPS256,
27722 IX86_BUILTIN_BLENDVPD256,
27723 IX86_BUILTIN_BLENDVPS256,
27724 IX86_BUILTIN_DIVPD256,
27725 IX86_BUILTIN_DIVPS256,
27726 IX86_BUILTIN_DPPS256,
27727 IX86_BUILTIN_HADDPD256,
27728 IX86_BUILTIN_HADDPS256,
27729 IX86_BUILTIN_HSUBPD256,
27730 IX86_BUILTIN_HSUBPS256,
27731 IX86_BUILTIN_MAXPD256,
27732 IX86_BUILTIN_MAXPS256,
27733 IX86_BUILTIN_MINPD256,
27734 IX86_BUILTIN_MINPS256,
27735 IX86_BUILTIN_MULPD256,
27736 IX86_BUILTIN_MULPS256,
27737 IX86_BUILTIN_ORPD256,
27738 IX86_BUILTIN_ORPS256,
27739 IX86_BUILTIN_SHUFPD256,
27740 IX86_BUILTIN_SHUFPS256,
27741 IX86_BUILTIN_SUBPD256,
27742 IX86_BUILTIN_SUBPS256,
27743 IX86_BUILTIN_XORPD256,
27744 IX86_BUILTIN_XORPS256,
27745 IX86_BUILTIN_CMPSD,
27746 IX86_BUILTIN_CMPSS,
27747 IX86_BUILTIN_CMPPD,
27748 IX86_BUILTIN_CMPPS,
27749 IX86_BUILTIN_CMPPD256,
27750 IX86_BUILTIN_CMPPS256,
27751 IX86_BUILTIN_CVTDQ2PD256,
27752 IX86_BUILTIN_CVTDQ2PS256,
27753 IX86_BUILTIN_CVTPD2PS256,
27754 IX86_BUILTIN_CVTPS2DQ256,
27755 IX86_BUILTIN_CVTPS2PD256,
27756 IX86_BUILTIN_CVTTPD2DQ256,
27757 IX86_BUILTIN_CVTPD2DQ256,
27758 IX86_BUILTIN_CVTTPS2DQ256,
27759 IX86_BUILTIN_EXTRACTF128PD256,
27760 IX86_BUILTIN_EXTRACTF128PS256,
27761 IX86_BUILTIN_EXTRACTF128SI256,
27762 IX86_BUILTIN_VZEROALL,
27763 IX86_BUILTIN_VZEROUPPER,
27764 IX86_BUILTIN_VPERMILVARPD,
27765 IX86_BUILTIN_VPERMILVARPS,
27766 IX86_BUILTIN_VPERMILVARPD256,
27767 IX86_BUILTIN_VPERMILVARPS256,
27768 IX86_BUILTIN_VPERMILPD,
27769 IX86_BUILTIN_VPERMILPS,
27770 IX86_BUILTIN_VPERMILPD256,
27771 IX86_BUILTIN_VPERMILPS256,
27772 IX86_BUILTIN_VPERMIL2PD,
27773 IX86_BUILTIN_VPERMIL2PS,
27774 IX86_BUILTIN_VPERMIL2PD256,
27775 IX86_BUILTIN_VPERMIL2PS256,
27776 IX86_BUILTIN_VPERM2F128PD256,
27777 IX86_BUILTIN_VPERM2F128PS256,
27778 IX86_BUILTIN_VPERM2F128SI256,
27779 IX86_BUILTIN_VBROADCASTSS,
27780 IX86_BUILTIN_VBROADCASTSD256,
27781 IX86_BUILTIN_VBROADCASTSS256,
27782 IX86_BUILTIN_VBROADCASTPD256,
27783 IX86_BUILTIN_VBROADCASTPS256,
27784 IX86_BUILTIN_VINSERTF128PD256,
27785 IX86_BUILTIN_VINSERTF128PS256,
27786 IX86_BUILTIN_VINSERTF128SI256,
27787 IX86_BUILTIN_LOADUPD256,
27788 IX86_BUILTIN_LOADUPS256,
27789 IX86_BUILTIN_STOREUPD256,
27790 IX86_BUILTIN_STOREUPS256,
27791 IX86_BUILTIN_LDDQU256,
27792 IX86_BUILTIN_MOVNTDQ256,
27793 IX86_BUILTIN_MOVNTPD256,
27794 IX86_BUILTIN_MOVNTPS256,
27795 IX86_BUILTIN_LOADDQU256,
27796 IX86_BUILTIN_STOREDQU256,
27797 IX86_BUILTIN_MASKLOADPD,
27798 IX86_BUILTIN_MASKLOADPS,
27799 IX86_BUILTIN_MASKSTOREPD,
27800 IX86_BUILTIN_MASKSTOREPS,
27801 IX86_BUILTIN_MASKLOADPD256,
27802 IX86_BUILTIN_MASKLOADPS256,
27803 IX86_BUILTIN_MASKSTOREPD256,
27804 IX86_BUILTIN_MASKSTOREPS256,
27805 IX86_BUILTIN_MOVSHDUP256,
27806 IX86_BUILTIN_MOVSLDUP256,
27807 IX86_BUILTIN_MOVDDUP256,
27809 IX86_BUILTIN_SQRTPD256,
27810 IX86_BUILTIN_SQRTPS256,
27811 IX86_BUILTIN_SQRTPS_NR256,
27812 IX86_BUILTIN_RSQRTPS256,
27813 IX86_BUILTIN_RSQRTPS_NR256,
27815 IX86_BUILTIN_RCPPS256,
27817 IX86_BUILTIN_ROUNDPD256,
27818 IX86_BUILTIN_ROUNDPS256,
27820 IX86_BUILTIN_FLOORPD256,
27821 IX86_BUILTIN_CEILPD256,
27822 IX86_BUILTIN_TRUNCPD256,
27823 IX86_BUILTIN_RINTPD256,
27824 IX86_BUILTIN_ROUNDPD_AZ256,
27826 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27827 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27828 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27830 IX86_BUILTIN_FLOORPS256,
27831 IX86_BUILTIN_CEILPS256,
27832 IX86_BUILTIN_TRUNCPS256,
27833 IX86_BUILTIN_RINTPS256,
27834 IX86_BUILTIN_ROUNDPS_AZ256,
27836 IX86_BUILTIN_FLOORPS_SFIX256,
27837 IX86_BUILTIN_CEILPS_SFIX256,
27838 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27840 IX86_BUILTIN_UNPCKHPD256,
27841 IX86_BUILTIN_UNPCKLPD256,
27842 IX86_BUILTIN_UNPCKHPS256,
27843 IX86_BUILTIN_UNPCKLPS256,
27845 IX86_BUILTIN_SI256_SI,
27846 IX86_BUILTIN_PS256_PS,
27847 IX86_BUILTIN_PD256_PD,
27848 IX86_BUILTIN_SI_SI256,
27849 IX86_BUILTIN_PS_PS256,
27850 IX86_BUILTIN_PD_PD256,
27852 IX86_BUILTIN_VTESTZPD,
27853 IX86_BUILTIN_VTESTCPD,
27854 IX86_BUILTIN_VTESTNZCPD,
27855 IX86_BUILTIN_VTESTZPS,
27856 IX86_BUILTIN_VTESTCPS,
27857 IX86_BUILTIN_VTESTNZCPS,
27858 IX86_BUILTIN_VTESTZPD256,
27859 IX86_BUILTIN_VTESTCPD256,
27860 IX86_BUILTIN_VTESTNZCPD256,
27861 IX86_BUILTIN_VTESTZPS256,
27862 IX86_BUILTIN_VTESTCPS256,
27863 IX86_BUILTIN_VTESTNZCPS256,
27864 IX86_BUILTIN_PTESTZ256,
27865 IX86_BUILTIN_PTESTC256,
27866 IX86_BUILTIN_PTESTNZC256,
27868 IX86_BUILTIN_MOVMSKPD256,
27869 IX86_BUILTIN_MOVMSKPS256,
27871 /* AVX2 */
27872 IX86_BUILTIN_MPSADBW256,
27873 IX86_BUILTIN_PABSB256,
27874 IX86_BUILTIN_PABSW256,
27875 IX86_BUILTIN_PABSD256,
27876 IX86_BUILTIN_PACKSSDW256,
27877 IX86_BUILTIN_PACKSSWB256,
27878 IX86_BUILTIN_PACKUSDW256,
27879 IX86_BUILTIN_PACKUSWB256,
27880 IX86_BUILTIN_PADDB256,
27881 IX86_BUILTIN_PADDW256,
27882 IX86_BUILTIN_PADDD256,
27883 IX86_BUILTIN_PADDQ256,
27884 IX86_BUILTIN_PADDSB256,
27885 IX86_BUILTIN_PADDSW256,
27886 IX86_BUILTIN_PADDUSB256,
27887 IX86_BUILTIN_PADDUSW256,
27888 IX86_BUILTIN_PALIGNR256,
27889 IX86_BUILTIN_AND256I,
27890 IX86_BUILTIN_ANDNOT256I,
27891 IX86_BUILTIN_PAVGB256,
27892 IX86_BUILTIN_PAVGW256,
27893 IX86_BUILTIN_PBLENDVB256,
27894 IX86_BUILTIN_PBLENDVW256,
27895 IX86_BUILTIN_PCMPEQB256,
27896 IX86_BUILTIN_PCMPEQW256,
27897 IX86_BUILTIN_PCMPEQD256,
27898 IX86_BUILTIN_PCMPEQQ256,
27899 IX86_BUILTIN_PCMPGTB256,
27900 IX86_BUILTIN_PCMPGTW256,
27901 IX86_BUILTIN_PCMPGTD256,
27902 IX86_BUILTIN_PCMPGTQ256,
27903 IX86_BUILTIN_PHADDW256,
27904 IX86_BUILTIN_PHADDD256,
27905 IX86_BUILTIN_PHADDSW256,
27906 IX86_BUILTIN_PHSUBW256,
27907 IX86_BUILTIN_PHSUBD256,
27908 IX86_BUILTIN_PHSUBSW256,
27909 IX86_BUILTIN_PMADDUBSW256,
27910 IX86_BUILTIN_PMADDWD256,
27911 IX86_BUILTIN_PMAXSB256,
27912 IX86_BUILTIN_PMAXSW256,
27913 IX86_BUILTIN_PMAXSD256,
27914 IX86_BUILTIN_PMAXUB256,
27915 IX86_BUILTIN_PMAXUW256,
27916 IX86_BUILTIN_PMAXUD256,
27917 IX86_BUILTIN_PMINSB256,
27918 IX86_BUILTIN_PMINSW256,
27919 IX86_BUILTIN_PMINSD256,
27920 IX86_BUILTIN_PMINUB256,
27921 IX86_BUILTIN_PMINUW256,
27922 IX86_BUILTIN_PMINUD256,
27923 IX86_BUILTIN_PMOVMSKB256,
27924 IX86_BUILTIN_PMOVSXBW256,
27925 IX86_BUILTIN_PMOVSXBD256,
27926 IX86_BUILTIN_PMOVSXBQ256,
27927 IX86_BUILTIN_PMOVSXWD256,
27928 IX86_BUILTIN_PMOVSXWQ256,
27929 IX86_BUILTIN_PMOVSXDQ256,
27930 IX86_BUILTIN_PMOVZXBW256,
27931 IX86_BUILTIN_PMOVZXBD256,
27932 IX86_BUILTIN_PMOVZXBQ256,
27933 IX86_BUILTIN_PMOVZXWD256,
27934 IX86_BUILTIN_PMOVZXWQ256,
27935 IX86_BUILTIN_PMOVZXDQ256,
27936 IX86_BUILTIN_PMULDQ256,
27937 IX86_BUILTIN_PMULHRSW256,
27938 IX86_BUILTIN_PMULHUW256,
27939 IX86_BUILTIN_PMULHW256,
27940 IX86_BUILTIN_PMULLW256,
27941 IX86_BUILTIN_PMULLD256,
27942 IX86_BUILTIN_PMULUDQ256,
27943 IX86_BUILTIN_POR256,
27944 IX86_BUILTIN_PSADBW256,
27945 IX86_BUILTIN_PSHUFB256,
27946 IX86_BUILTIN_PSHUFD256,
27947 IX86_BUILTIN_PSHUFHW256,
27948 IX86_BUILTIN_PSHUFLW256,
27949 IX86_BUILTIN_PSIGNB256,
27950 IX86_BUILTIN_PSIGNW256,
27951 IX86_BUILTIN_PSIGND256,
27952 IX86_BUILTIN_PSLLDQI256,
27953 IX86_BUILTIN_PSLLWI256,
27954 IX86_BUILTIN_PSLLW256,
27955 IX86_BUILTIN_PSLLDI256,
27956 IX86_BUILTIN_PSLLD256,
27957 IX86_BUILTIN_PSLLQI256,
27958 IX86_BUILTIN_PSLLQ256,
27959 IX86_BUILTIN_PSRAWI256,
27960 IX86_BUILTIN_PSRAW256,
27961 IX86_BUILTIN_PSRADI256,
27962 IX86_BUILTIN_PSRAD256,
27963 IX86_BUILTIN_PSRLDQI256,
27964 IX86_BUILTIN_PSRLWI256,
27965 IX86_BUILTIN_PSRLW256,
27966 IX86_BUILTIN_PSRLDI256,
27967 IX86_BUILTIN_PSRLD256,
27968 IX86_BUILTIN_PSRLQI256,
27969 IX86_BUILTIN_PSRLQ256,
27970 IX86_BUILTIN_PSUBB256,
27971 IX86_BUILTIN_PSUBW256,
27972 IX86_BUILTIN_PSUBD256,
27973 IX86_BUILTIN_PSUBQ256,
27974 IX86_BUILTIN_PSUBSB256,
27975 IX86_BUILTIN_PSUBSW256,
27976 IX86_BUILTIN_PSUBUSB256,
27977 IX86_BUILTIN_PSUBUSW256,
27978 IX86_BUILTIN_PUNPCKHBW256,
27979 IX86_BUILTIN_PUNPCKHWD256,
27980 IX86_BUILTIN_PUNPCKHDQ256,
27981 IX86_BUILTIN_PUNPCKHQDQ256,
27982 IX86_BUILTIN_PUNPCKLBW256,
27983 IX86_BUILTIN_PUNPCKLWD256,
27984 IX86_BUILTIN_PUNPCKLDQ256,
27985 IX86_BUILTIN_PUNPCKLQDQ256,
27986 IX86_BUILTIN_PXOR256,
27987 IX86_BUILTIN_MOVNTDQA256,
27988 IX86_BUILTIN_VBROADCASTSS_PS,
27989 IX86_BUILTIN_VBROADCASTSS_PS256,
27990 IX86_BUILTIN_VBROADCASTSD_PD256,
27991 IX86_BUILTIN_VBROADCASTSI256,
27992 IX86_BUILTIN_PBLENDD256,
27993 IX86_BUILTIN_PBLENDD128,
27994 IX86_BUILTIN_PBROADCASTB256,
27995 IX86_BUILTIN_PBROADCASTW256,
27996 IX86_BUILTIN_PBROADCASTD256,
27997 IX86_BUILTIN_PBROADCASTQ256,
27998 IX86_BUILTIN_PBROADCASTB128,
27999 IX86_BUILTIN_PBROADCASTW128,
28000 IX86_BUILTIN_PBROADCASTD128,
28001 IX86_BUILTIN_PBROADCASTQ128,
28002 IX86_BUILTIN_VPERMVARSI256,
28003 IX86_BUILTIN_VPERMDF256,
28004 IX86_BUILTIN_VPERMVARSF256,
28005 IX86_BUILTIN_VPERMDI256,
28006 IX86_BUILTIN_VPERMTI256,
28007 IX86_BUILTIN_VEXTRACT128I256,
28008 IX86_BUILTIN_VINSERT128I256,
28009 IX86_BUILTIN_MASKLOADD,
28010 IX86_BUILTIN_MASKLOADQ,
28011 IX86_BUILTIN_MASKLOADD256,
28012 IX86_BUILTIN_MASKLOADQ256,
28013 IX86_BUILTIN_MASKSTORED,
28014 IX86_BUILTIN_MASKSTOREQ,
28015 IX86_BUILTIN_MASKSTORED256,
28016 IX86_BUILTIN_MASKSTOREQ256,
28017 IX86_BUILTIN_PSLLVV4DI,
28018 IX86_BUILTIN_PSLLVV2DI,
28019 IX86_BUILTIN_PSLLVV8SI,
28020 IX86_BUILTIN_PSLLVV4SI,
28021 IX86_BUILTIN_PSRAVV8SI,
28022 IX86_BUILTIN_PSRAVV4SI,
28023 IX86_BUILTIN_PSRLVV4DI,
28024 IX86_BUILTIN_PSRLVV2DI,
28025 IX86_BUILTIN_PSRLVV8SI,
28026 IX86_BUILTIN_PSRLVV4SI,
28028 IX86_BUILTIN_GATHERSIV2DF,
28029 IX86_BUILTIN_GATHERSIV4DF,
28030 IX86_BUILTIN_GATHERDIV2DF,
28031 IX86_BUILTIN_GATHERDIV4DF,
28032 IX86_BUILTIN_GATHERSIV4SF,
28033 IX86_BUILTIN_GATHERSIV8SF,
28034 IX86_BUILTIN_GATHERDIV4SF,
28035 IX86_BUILTIN_GATHERDIV8SF,
28036 IX86_BUILTIN_GATHERSIV2DI,
28037 IX86_BUILTIN_GATHERSIV4DI,
28038 IX86_BUILTIN_GATHERDIV2DI,
28039 IX86_BUILTIN_GATHERDIV4DI,
28040 IX86_BUILTIN_GATHERSIV4SI,
28041 IX86_BUILTIN_GATHERSIV8SI,
28042 IX86_BUILTIN_GATHERDIV4SI,
28043 IX86_BUILTIN_GATHERDIV8SI,
28045 /* AVX512F */
28046 IX86_BUILTIN_ADDPD512,
28047 IX86_BUILTIN_ADDPS512,
28048 IX86_BUILTIN_ADDSD_ROUND,
28049 IX86_BUILTIN_ADDSS_ROUND,
28050 IX86_BUILTIN_ALIGND512,
28051 IX86_BUILTIN_ALIGNQ512,
28052 IX86_BUILTIN_BLENDMD512,
28053 IX86_BUILTIN_BLENDMPD512,
28054 IX86_BUILTIN_BLENDMPS512,
28055 IX86_BUILTIN_BLENDMQ512,
28056 IX86_BUILTIN_BROADCASTF32X4_512,
28057 IX86_BUILTIN_BROADCASTF64X4_512,
28058 IX86_BUILTIN_BROADCASTI32X4_512,
28059 IX86_BUILTIN_BROADCASTI64X4_512,
28060 IX86_BUILTIN_BROADCASTSD512,
28061 IX86_BUILTIN_BROADCASTSS512,
28062 IX86_BUILTIN_CMPD512,
28063 IX86_BUILTIN_CMPPD512,
28064 IX86_BUILTIN_CMPPS512,
28065 IX86_BUILTIN_CMPQ512,
28066 IX86_BUILTIN_CMPSD_MASK,
28067 IX86_BUILTIN_CMPSS_MASK,
28068 IX86_BUILTIN_COMIDF,
28069 IX86_BUILTIN_COMISF,
28070 IX86_BUILTIN_COMPRESSPD512,
28071 IX86_BUILTIN_COMPRESSPDSTORE512,
28072 IX86_BUILTIN_COMPRESSPS512,
28073 IX86_BUILTIN_COMPRESSPSSTORE512,
28074 IX86_BUILTIN_CVTDQ2PD512,
28075 IX86_BUILTIN_CVTDQ2PS512,
28076 IX86_BUILTIN_CVTPD2DQ512,
28077 IX86_BUILTIN_CVTPD2PS512,
28078 IX86_BUILTIN_CVTPD2UDQ512,
28079 IX86_BUILTIN_CVTPH2PS512,
28080 IX86_BUILTIN_CVTPS2DQ512,
28081 IX86_BUILTIN_CVTPS2PD512,
28082 IX86_BUILTIN_CVTPS2PH512,
28083 IX86_BUILTIN_CVTPS2UDQ512,
28084 IX86_BUILTIN_CVTSD2SS_ROUND,
28085 IX86_BUILTIN_CVTSI2SD64,
28086 IX86_BUILTIN_CVTSI2SS32,
28087 IX86_BUILTIN_CVTSI2SS64,
28088 IX86_BUILTIN_CVTSS2SD_ROUND,
28089 IX86_BUILTIN_CVTTPD2DQ512,
28090 IX86_BUILTIN_CVTTPD2UDQ512,
28091 IX86_BUILTIN_CVTTPS2DQ512,
28092 IX86_BUILTIN_CVTTPS2UDQ512,
28093 IX86_BUILTIN_CVTUDQ2PD512,
28094 IX86_BUILTIN_CVTUDQ2PS512,
28095 IX86_BUILTIN_CVTUSI2SD32,
28096 IX86_BUILTIN_CVTUSI2SD64,
28097 IX86_BUILTIN_CVTUSI2SS32,
28098 IX86_BUILTIN_CVTUSI2SS64,
28099 IX86_BUILTIN_DIVPD512,
28100 IX86_BUILTIN_DIVPS512,
28101 IX86_BUILTIN_DIVSD_ROUND,
28102 IX86_BUILTIN_DIVSS_ROUND,
28103 IX86_BUILTIN_EXPANDPD512,
28104 IX86_BUILTIN_EXPANDPD512Z,
28105 IX86_BUILTIN_EXPANDPDLOAD512,
28106 IX86_BUILTIN_EXPANDPDLOAD512Z,
28107 IX86_BUILTIN_EXPANDPS512,
28108 IX86_BUILTIN_EXPANDPS512Z,
28109 IX86_BUILTIN_EXPANDPSLOAD512,
28110 IX86_BUILTIN_EXPANDPSLOAD512Z,
28111 IX86_BUILTIN_EXTRACTF32X4,
28112 IX86_BUILTIN_EXTRACTF64X4,
28113 IX86_BUILTIN_EXTRACTI32X4,
28114 IX86_BUILTIN_EXTRACTI64X4,
28115 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28116 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28117 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28118 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28119 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28120 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28121 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28122 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28123 IX86_BUILTIN_GETEXPPD512,
28124 IX86_BUILTIN_GETEXPPS512,
28125 IX86_BUILTIN_GETEXPSD128,
28126 IX86_BUILTIN_GETEXPSS128,
28127 IX86_BUILTIN_GETMANTPD512,
28128 IX86_BUILTIN_GETMANTPS512,
28129 IX86_BUILTIN_GETMANTSD128,
28130 IX86_BUILTIN_GETMANTSS128,
28131 IX86_BUILTIN_INSERTF32X4,
28132 IX86_BUILTIN_INSERTF64X4,
28133 IX86_BUILTIN_INSERTI32X4,
28134 IX86_BUILTIN_INSERTI64X4,
28135 IX86_BUILTIN_LOADAPD512,
28136 IX86_BUILTIN_LOADAPS512,
28137 IX86_BUILTIN_LOADDQUDI512,
28138 IX86_BUILTIN_LOADDQUSI512,
28139 IX86_BUILTIN_LOADUPD512,
28140 IX86_BUILTIN_LOADUPS512,
28141 IX86_BUILTIN_MAXPD512,
28142 IX86_BUILTIN_MAXPS512,
28143 IX86_BUILTIN_MAXSD_ROUND,
28144 IX86_BUILTIN_MAXSS_ROUND,
28145 IX86_BUILTIN_MINPD512,
28146 IX86_BUILTIN_MINPS512,
28147 IX86_BUILTIN_MINSD_ROUND,
28148 IX86_BUILTIN_MINSS_ROUND,
28149 IX86_BUILTIN_MOVAPD512,
28150 IX86_BUILTIN_MOVAPS512,
28151 IX86_BUILTIN_MOVDDUP512,
28152 IX86_BUILTIN_MOVDQA32LOAD512,
28153 IX86_BUILTIN_MOVDQA32STORE512,
28154 IX86_BUILTIN_MOVDQA32_512,
28155 IX86_BUILTIN_MOVDQA64LOAD512,
28156 IX86_BUILTIN_MOVDQA64STORE512,
28157 IX86_BUILTIN_MOVDQA64_512,
28158 IX86_BUILTIN_MOVNTDQ512,
28159 IX86_BUILTIN_MOVNTDQA512,
28160 IX86_BUILTIN_MOVNTPD512,
28161 IX86_BUILTIN_MOVNTPS512,
28162 IX86_BUILTIN_MOVSHDUP512,
28163 IX86_BUILTIN_MOVSLDUP512,
28164 IX86_BUILTIN_MULPD512,
28165 IX86_BUILTIN_MULPS512,
28166 IX86_BUILTIN_MULSD_ROUND,
28167 IX86_BUILTIN_MULSS_ROUND,
28168 IX86_BUILTIN_PABSD512,
28169 IX86_BUILTIN_PABSQ512,
28170 IX86_BUILTIN_PADDD512,
28171 IX86_BUILTIN_PADDQ512,
28172 IX86_BUILTIN_PANDD512,
28173 IX86_BUILTIN_PANDND512,
28174 IX86_BUILTIN_PANDNQ512,
28175 IX86_BUILTIN_PANDQ512,
28176 IX86_BUILTIN_PBROADCASTD512,
28177 IX86_BUILTIN_PBROADCASTD512_GPR,
28178 IX86_BUILTIN_PBROADCASTMB512,
28179 IX86_BUILTIN_PBROADCASTMW512,
28180 IX86_BUILTIN_PBROADCASTQ512,
28181 IX86_BUILTIN_PBROADCASTQ512_GPR,
28182 IX86_BUILTIN_PBROADCASTQ512_MEM,
28183 IX86_BUILTIN_PCMPEQD512_MASK,
28184 IX86_BUILTIN_PCMPEQQ512_MASK,
28185 IX86_BUILTIN_PCMPGTD512_MASK,
28186 IX86_BUILTIN_PCMPGTQ512_MASK,
28187 IX86_BUILTIN_PCOMPRESSD512,
28188 IX86_BUILTIN_PCOMPRESSDSTORE512,
28189 IX86_BUILTIN_PCOMPRESSQ512,
28190 IX86_BUILTIN_PCOMPRESSQSTORE512,
28191 IX86_BUILTIN_PEXPANDD512,
28192 IX86_BUILTIN_PEXPANDD512Z,
28193 IX86_BUILTIN_PEXPANDDLOAD512,
28194 IX86_BUILTIN_PEXPANDDLOAD512Z,
28195 IX86_BUILTIN_PEXPANDQ512,
28196 IX86_BUILTIN_PEXPANDQ512Z,
28197 IX86_BUILTIN_PEXPANDQLOAD512,
28198 IX86_BUILTIN_PEXPANDQLOAD512Z,
28199 IX86_BUILTIN_PMAXSD512,
28200 IX86_BUILTIN_PMAXSQ512,
28201 IX86_BUILTIN_PMAXUD512,
28202 IX86_BUILTIN_PMAXUQ512,
28203 IX86_BUILTIN_PMINSD512,
28204 IX86_BUILTIN_PMINSQ512,
28205 IX86_BUILTIN_PMINUD512,
28206 IX86_BUILTIN_PMINUQ512,
28207 IX86_BUILTIN_PMOVDB512,
28208 IX86_BUILTIN_PMOVDB512_MEM,
28209 IX86_BUILTIN_PMOVDW512,
28210 IX86_BUILTIN_PMOVDW512_MEM,
28211 IX86_BUILTIN_PMOVQB512,
28212 IX86_BUILTIN_PMOVQB512_MEM,
28213 IX86_BUILTIN_PMOVQD512,
28214 IX86_BUILTIN_PMOVQD512_MEM,
28215 IX86_BUILTIN_PMOVQW512,
28216 IX86_BUILTIN_PMOVQW512_MEM,
28217 IX86_BUILTIN_PMOVSDB512,
28218 IX86_BUILTIN_PMOVSDB512_MEM,
28219 IX86_BUILTIN_PMOVSDW512,
28220 IX86_BUILTIN_PMOVSDW512_MEM,
28221 IX86_BUILTIN_PMOVSQB512,
28222 IX86_BUILTIN_PMOVSQB512_MEM,
28223 IX86_BUILTIN_PMOVSQD512,
28224 IX86_BUILTIN_PMOVSQD512_MEM,
28225 IX86_BUILTIN_PMOVSQW512,
28226 IX86_BUILTIN_PMOVSQW512_MEM,
28227 IX86_BUILTIN_PMOVSXBD512,
28228 IX86_BUILTIN_PMOVSXBQ512,
28229 IX86_BUILTIN_PMOVSXDQ512,
28230 IX86_BUILTIN_PMOVSXWD512,
28231 IX86_BUILTIN_PMOVSXWQ512,
28232 IX86_BUILTIN_PMOVUSDB512,
28233 IX86_BUILTIN_PMOVUSDB512_MEM,
28234 IX86_BUILTIN_PMOVUSDW512,
28235 IX86_BUILTIN_PMOVUSDW512_MEM,
28236 IX86_BUILTIN_PMOVUSQB512,
28237 IX86_BUILTIN_PMOVUSQB512_MEM,
28238 IX86_BUILTIN_PMOVUSQD512,
28239 IX86_BUILTIN_PMOVUSQD512_MEM,
28240 IX86_BUILTIN_PMOVUSQW512,
28241 IX86_BUILTIN_PMOVUSQW512_MEM,
28242 IX86_BUILTIN_PMOVZXBD512,
28243 IX86_BUILTIN_PMOVZXBQ512,
28244 IX86_BUILTIN_PMOVZXDQ512,
28245 IX86_BUILTIN_PMOVZXWD512,
28246 IX86_BUILTIN_PMOVZXWQ512,
28247 IX86_BUILTIN_PMULDQ512,
28248 IX86_BUILTIN_PMULLD512,
28249 IX86_BUILTIN_PMULUDQ512,
28250 IX86_BUILTIN_PORD512,
28251 IX86_BUILTIN_PORQ512,
28252 IX86_BUILTIN_PROLD512,
28253 IX86_BUILTIN_PROLQ512,
28254 IX86_BUILTIN_PROLVD512,
28255 IX86_BUILTIN_PROLVQ512,
28256 IX86_BUILTIN_PRORD512,
28257 IX86_BUILTIN_PRORQ512,
28258 IX86_BUILTIN_PRORVD512,
28259 IX86_BUILTIN_PRORVQ512,
28260 IX86_BUILTIN_PSHUFD512,
28261 IX86_BUILTIN_PSLLD512,
28262 IX86_BUILTIN_PSLLDI512,
28263 IX86_BUILTIN_PSLLQ512,
28264 IX86_BUILTIN_PSLLQI512,
28265 IX86_BUILTIN_PSLLVV16SI,
28266 IX86_BUILTIN_PSLLVV8DI,
28267 IX86_BUILTIN_PSRAD512,
28268 IX86_BUILTIN_PSRADI512,
28269 IX86_BUILTIN_PSRAQ512,
28270 IX86_BUILTIN_PSRAQI512,
28271 IX86_BUILTIN_PSRAVV16SI,
28272 IX86_BUILTIN_PSRAVV8DI,
28273 IX86_BUILTIN_PSRLD512,
28274 IX86_BUILTIN_PSRLDI512,
28275 IX86_BUILTIN_PSRLQ512,
28276 IX86_BUILTIN_PSRLQI512,
28277 IX86_BUILTIN_PSRLVV16SI,
28278 IX86_BUILTIN_PSRLVV8DI,
28279 IX86_BUILTIN_PSUBD512,
28280 IX86_BUILTIN_PSUBQ512,
28281 IX86_BUILTIN_PTESTMD512,
28282 IX86_BUILTIN_PTESTMQ512,
28283 IX86_BUILTIN_PTESTNMD512,
28284 IX86_BUILTIN_PTESTNMQ512,
28285 IX86_BUILTIN_PUNPCKHDQ512,
28286 IX86_BUILTIN_PUNPCKHQDQ512,
28287 IX86_BUILTIN_PUNPCKLDQ512,
28288 IX86_BUILTIN_PUNPCKLQDQ512,
28289 IX86_BUILTIN_PXORD512,
28290 IX86_BUILTIN_PXORQ512,
28291 IX86_BUILTIN_RCP14PD512,
28292 IX86_BUILTIN_RCP14PS512,
28293 IX86_BUILTIN_RCP14SD,
28294 IX86_BUILTIN_RCP14SS,
28295 IX86_BUILTIN_RNDSCALEPD,
28296 IX86_BUILTIN_RNDSCALEPS,
28297 IX86_BUILTIN_RNDSCALESD,
28298 IX86_BUILTIN_RNDSCALESS,
28299 IX86_BUILTIN_RSQRT14PD512,
28300 IX86_BUILTIN_RSQRT14PS512,
28301 IX86_BUILTIN_RSQRT14SD,
28302 IX86_BUILTIN_RSQRT14SS,
28303 IX86_BUILTIN_SCALEFPD512,
28304 IX86_BUILTIN_SCALEFPS512,
28305 IX86_BUILTIN_SCALEFSD,
28306 IX86_BUILTIN_SCALEFSS,
28307 IX86_BUILTIN_SHUFPD512,
28308 IX86_BUILTIN_SHUFPS512,
28309 IX86_BUILTIN_SHUF_F32x4,
28310 IX86_BUILTIN_SHUF_F64x2,
28311 IX86_BUILTIN_SHUF_I32x4,
28312 IX86_BUILTIN_SHUF_I64x2,
28313 IX86_BUILTIN_SQRTPD512,
28314 IX86_BUILTIN_SQRTPD512_MASK,
28315 IX86_BUILTIN_SQRTPS512_MASK,
28316 IX86_BUILTIN_SQRTPS_NR512,
28317 IX86_BUILTIN_SQRTSD_ROUND,
28318 IX86_BUILTIN_SQRTSS_ROUND,
28319 IX86_BUILTIN_STOREAPD512,
28320 IX86_BUILTIN_STOREAPS512,
28321 IX86_BUILTIN_STOREDQUDI512,
28322 IX86_BUILTIN_STOREDQUSI512,
28323 IX86_BUILTIN_STOREUPD512,
28324 IX86_BUILTIN_STOREUPS512,
28325 IX86_BUILTIN_SUBPD512,
28326 IX86_BUILTIN_SUBPS512,
28327 IX86_BUILTIN_SUBSD_ROUND,
28328 IX86_BUILTIN_SUBSS_ROUND,
28329 IX86_BUILTIN_UCMPD512,
28330 IX86_BUILTIN_UCMPQ512,
28331 IX86_BUILTIN_UNPCKHPD512,
28332 IX86_BUILTIN_UNPCKHPS512,
28333 IX86_BUILTIN_UNPCKLPD512,
28334 IX86_BUILTIN_UNPCKLPS512,
28335 IX86_BUILTIN_VCVTSD2SI32,
28336 IX86_BUILTIN_VCVTSD2SI64,
28337 IX86_BUILTIN_VCVTSD2USI32,
28338 IX86_BUILTIN_VCVTSD2USI64,
28339 IX86_BUILTIN_VCVTSS2SI32,
28340 IX86_BUILTIN_VCVTSS2SI64,
28341 IX86_BUILTIN_VCVTSS2USI32,
28342 IX86_BUILTIN_VCVTSS2USI64,
28343 IX86_BUILTIN_VCVTTSD2SI32,
28344 IX86_BUILTIN_VCVTTSD2SI64,
28345 IX86_BUILTIN_VCVTTSD2USI32,
28346 IX86_BUILTIN_VCVTTSD2USI64,
28347 IX86_BUILTIN_VCVTTSS2SI32,
28348 IX86_BUILTIN_VCVTTSS2SI64,
28349 IX86_BUILTIN_VCVTTSS2USI32,
28350 IX86_BUILTIN_VCVTTSS2USI64,
28351 IX86_BUILTIN_VFMADDPD512_MASK,
28352 IX86_BUILTIN_VFMADDPD512_MASK3,
28353 IX86_BUILTIN_VFMADDPD512_MASKZ,
28354 IX86_BUILTIN_VFMADDPS512_MASK,
28355 IX86_BUILTIN_VFMADDPS512_MASK3,
28356 IX86_BUILTIN_VFMADDPS512_MASKZ,
28357 IX86_BUILTIN_VFMADDSD3_ROUND,
28358 IX86_BUILTIN_VFMADDSS3_ROUND,
28359 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28360 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28361 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28362 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28363 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28364 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28365 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28366 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28367 IX86_BUILTIN_VFMSUBPD512_MASK3,
28368 IX86_BUILTIN_VFMSUBPS512_MASK3,
28369 IX86_BUILTIN_VFMSUBSD3_MASK3,
28370 IX86_BUILTIN_VFMSUBSS3_MASK3,
28371 IX86_BUILTIN_VFNMADDPD512_MASK,
28372 IX86_BUILTIN_VFNMADDPS512_MASK,
28373 IX86_BUILTIN_VFNMSUBPD512_MASK,
28374 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28375 IX86_BUILTIN_VFNMSUBPS512_MASK,
28376 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28377 IX86_BUILTIN_VPCLZCNTD512,
28378 IX86_BUILTIN_VPCLZCNTQ512,
28379 IX86_BUILTIN_VPCONFLICTD512,
28380 IX86_BUILTIN_VPCONFLICTQ512,
28381 IX86_BUILTIN_VPERMDF512,
28382 IX86_BUILTIN_VPERMDI512,
28383 IX86_BUILTIN_VPERMI2VARD512,
28384 IX86_BUILTIN_VPERMI2VARPD512,
28385 IX86_BUILTIN_VPERMI2VARPS512,
28386 IX86_BUILTIN_VPERMI2VARQ512,
28387 IX86_BUILTIN_VPERMILPD512,
28388 IX86_BUILTIN_VPERMILPS512,
28389 IX86_BUILTIN_VPERMILVARPD512,
28390 IX86_BUILTIN_VPERMILVARPS512,
28391 IX86_BUILTIN_VPERMT2VARD512,
28392 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28393 IX86_BUILTIN_VPERMT2VARPD512,
28394 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28395 IX86_BUILTIN_VPERMT2VARPS512,
28396 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28397 IX86_BUILTIN_VPERMT2VARQ512,
28398 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28399 IX86_BUILTIN_VPERMVARDF512,
28400 IX86_BUILTIN_VPERMVARDI512,
28401 IX86_BUILTIN_VPERMVARSF512,
28402 IX86_BUILTIN_VPERMVARSI512,
28403 IX86_BUILTIN_VTERNLOGD512_MASK,
28404 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28405 IX86_BUILTIN_VTERNLOGQ512_MASK,
28406 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28408 /* Mask arithmetic operations */
28409 IX86_BUILTIN_KAND16,
28410 IX86_BUILTIN_KANDN16,
28411 IX86_BUILTIN_KNOT16,
28412 IX86_BUILTIN_KOR16,
28413 IX86_BUILTIN_KORTESTC16,
28414 IX86_BUILTIN_KORTESTZ16,
28415 IX86_BUILTIN_KUNPCKBW,
28416 IX86_BUILTIN_KXNOR16,
28417 IX86_BUILTIN_KXOR16,
28418 IX86_BUILTIN_KMOV16,
28420 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28421 where all operands are 32-byte or 64-byte wide respectively. */
28422 IX86_BUILTIN_GATHERALTSIV4DF,
28423 IX86_BUILTIN_GATHERALTDIV8SF,
28424 IX86_BUILTIN_GATHERALTSIV4DI,
28425 IX86_BUILTIN_GATHERALTDIV8SI,
28426 IX86_BUILTIN_GATHER3ALTDIV16SF,
28427 IX86_BUILTIN_GATHER3ALTDIV16SI,
28428 IX86_BUILTIN_GATHER3ALTSIV8DF,
28429 IX86_BUILTIN_GATHER3ALTSIV8DI,
28430 IX86_BUILTIN_GATHER3DIV16SF,
28431 IX86_BUILTIN_GATHER3DIV16SI,
28432 IX86_BUILTIN_GATHER3DIV8DF,
28433 IX86_BUILTIN_GATHER3DIV8DI,
28434 IX86_BUILTIN_GATHER3SIV16SF,
28435 IX86_BUILTIN_GATHER3SIV16SI,
28436 IX86_BUILTIN_GATHER3SIV8DF,
28437 IX86_BUILTIN_GATHER3SIV8DI,
28438 IX86_BUILTIN_SCATTERDIV16SF,
28439 IX86_BUILTIN_SCATTERDIV16SI,
28440 IX86_BUILTIN_SCATTERDIV8DF,
28441 IX86_BUILTIN_SCATTERDIV8DI,
28442 IX86_BUILTIN_SCATTERSIV16SF,
28443 IX86_BUILTIN_SCATTERSIV16SI,
28444 IX86_BUILTIN_SCATTERSIV8DF,
28445 IX86_BUILTIN_SCATTERSIV8DI,
28447 /* AVX512PF */
28448 IX86_BUILTIN_GATHERPFQPD,
28449 IX86_BUILTIN_GATHERPFDPS,
28450 IX86_BUILTIN_GATHERPFDPD,
28451 IX86_BUILTIN_GATHERPFQPS,
28452 IX86_BUILTIN_SCATTERPFDPD,
28453 IX86_BUILTIN_SCATTERPFDPS,
28454 IX86_BUILTIN_SCATTERPFQPD,
28455 IX86_BUILTIN_SCATTERPFQPS,
28457 /* AVX-512ER */
28458 IX86_BUILTIN_EXP2PD_MASK,
28459 IX86_BUILTIN_EXP2PS_MASK,
28460 IX86_BUILTIN_EXP2PS,
28461 IX86_BUILTIN_RCP28PD,
28462 IX86_BUILTIN_RCP28PS,
28463 IX86_BUILTIN_RCP28SD,
28464 IX86_BUILTIN_RCP28SS,
28465 IX86_BUILTIN_RSQRT28PD,
28466 IX86_BUILTIN_RSQRT28PS,
28467 IX86_BUILTIN_RSQRT28SD,
28468 IX86_BUILTIN_RSQRT28SS,
28470 /* SHA builtins. */
28471 IX86_BUILTIN_SHA1MSG1,
28472 IX86_BUILTIN_SHA1MSG2,
28473 IX86_BUILTIN_SHA1NEXTE,
28474 IX86_BUILTIN_SHA1RNDS4,
28475 IX86_BUILTIN_SHA256MSG1,
28476 IX86_BUILTIN_SHA256MSG2,
28477 IX86_BUILTIN_SHA256RNDS2,
28479 /* TFmode support builtins. */
28480 IX86_BUILTIN_INFQ,
28481 IX86_BUILTIN_HUGE_VALQ,
28482 IX86_BUILTIN_FABSQ,
28483 IX86_BUILTIN_COPYSIGNQ,
28485 /* Vectorizer support builtins. */
28486 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28487 IX86_BUILTIN_CPYSGNPS,
28488 IX86_BUILTIN_CPYSGNPD,
28489 IX86_BUILTIN_CPYSGNPS256,
28490 IX86_BUILTIN_CPYSGNPS512,
28491 IX86_BUILTIN_CPYSGNPD256,
28492 IX86_BUILTIN_CPYSGNPD512,
28493 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28494 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28497 /* FMA4 instructions. */
28498 IX86_BUILTIN_VFMADDSS,
28499 IX86_BUILTIN_VFMADDSD,
28500 IX86_BUILTIN_VFMADDPS,
28501 IX86_BUILTIN_VFMADDPD,
28502 IX86_BUILTIN_VFMADDPS256,
28503 IX86_BUILTIN_VFMADDPD256,
28504 IX86_BUILTIN_VFMADDSUBPS,
28505 IX86_BUILTIN_VFMADDSUBPD,
28506 IX86_BUILTIN_VFMADDSUBPS256,
28507 IX86_BUILTIN_VFMADDSUBPD256,
28509 /* FMA3 instructions. */
28510 IX86_BUILTIN_VFMADDSS3,
28511 IX86_BUILTIN_VFMADDSD3,
28513 /* XOP instructions. */
28514 IX86_BUILTIN_VPCMOV,
28515 IX86_BUILTIN_VPCMOV_V2DI,
28516 IX86_BUILTIN_VPCMOV_V4SI,
28517 IX86_BUILTIN_VPCMOV_V8HI,
28518 IX86_BUILTIN_VPCMOV_V16QI,
28519 IX86_BUILTIN_VPCMOV_V4SF,
28520 IX86_BUILTIN_VPCMOV_V2DF,
28521 IX86_BUILTIN_VPCMOV256,
28522 IX86_BUILTIN_VPCMOV_V4DI256,
28523 IX86_BUILTIN_VPCMOV_V8SI256,
28524 IX86_BUILTIN_VPCMOV_V16HI256,
28525 IX86_BUILTIN_VPCMOV_V32QI256,
28526 IX86_BUILTIN_VPCMOV_V8SF256,
28527 IX86_BUILTIN_VPCMOV_V4DF256,
28529 IX86_BUILTIN_VPPERM,
28531 IX86_BUILTIN_VPMACSSWW,
28532 IX86_BUILTIN_VPMACSWW,
28533 IX86_BUILTIN_VPMACSSWD,
28534 IX86_BUILTIN_VPMACSWD,
28535 IX86_BUILTIN_VPMACSSDD,
28536 IX86_BUILTIN_VPMACSDD,
28537 IX86_BUILTIN_VPMACSSDQL,
28538 IX86_BUILTIN_VPMACSSDQH,
28539 IX86_BUILTIN_VPMACSDQL,
28540 IX86_BUILTIN_VPMACSDQH,
28541 IX86_BUILTIN_VPMADCSSWD,
28542 IX86_BUILTIN_VPMADCSWD,
28544 IX86_BUILTIN_VPHADDBW,
28545 IX86_BUILTIN_VPHADDBD,
28546 IX86_BUILTIN_VPHADDBQ,
28547 IX86_BUILTIN_VPHADDWD,
28548 IX86_BUILTIN_VPHADDWQ,
28549 IX86_BUILTIN_VPHADDDQ,
28550 IX86_BUILTIN_VPHADDUBW,
28551 IX86_BUILTIN_VPHADDUBD,
28552 IX86_BUILTIN_VPHADDUBQ,
28553 IX86_BUILTIN_VPHADDUWD,
28554 IX86_BUILTIN_VPHADDUWQ,
28555 IX86_BUILTIN_VPHADDUDQ,
28556 IX86_BUILTIN_VPHSUBBW,
28557 IX86_BUILTIN_VPHSUBWD,
28558 IX86_BUILTIN_VPHSUBDQ,
28560 IX86_BUILTIN_VPROTB,
28561 IX86_BUILTIN_VPROTW,
28562 IX86_BUILTIN_VPROTD,
28563 IX86_BUILTIN_VPROTQ,
28564 IX86_BUILTIN_VPROTB_IMM,
28565 IX86_BUILTIN_VPROTW_IMM,
28566 IX86_BUILTIN_VPROTD_IMM,
28567 IX86_BUILTIN_VPROTQ_IMM,
28569 IX86_BUILTIN_VPSHLB,
28570 IX86_BUILTIN_VPSHLW,
28571 IX86_BUILTIN_VPSHLD,
28572 IX86_BUILTIN_VPSHLQ,
28573 IX86_BUILTIN_VPSHAB,
28574 IX86_BUILTIN_VPSHAW,
28575 IX86_BUILTIN_VPSHAD,
28576 IX86_BUILTIN_VPSHAQ,
28578 IX86_BUILTIN_VFRCZSS,
28579 IX86_BUILTIN_VFRCZSD,
28580 IX86_BUILTIN_VFRCZPS,
28581 IX86_BUILTIN_VFRCZPD,
28582 IX86_BUILTIN_VFRCZPS256,
28583 IX86_BUILTIN_VFRCZPD256,
28585 IX86_BUILTIN_VPCOMEQUB,
28586 IX86_BUILTIN_VPCOMNEUB,
28587 IX86_BUILTIN_VPCOMLTUB,
28588 IX86_BUILTIN_VPCOMLEUB,
28589 IX86_BUILTIN_VPCOMGTUB,
28590 IX86_BUILTIN_VPCOMGEUB,
28591 IX86_BUILTIN_VPCOMFALSEUB,
28592 IX86_BUILTIN_VPCOMTRUEUB,
28594 IX86_BUILTIN_VPCOMEQUW,
28595 IX86_BUILTIN_VPCOMNEUW,
28596 IX86_BUILTIN_VPCOMLTUW,
28597 IX86_BUILTIN_VPCOMLEUW,
28598 IX86_BUILTIN_VPCOMGTUW,
28599 IX86_BUILTIN_VPCOMGEUW,
28600 IX86_BUILTIN_VPCOMFALSEUW,
28601 IX86_BUILTIN_VPCOMTRUEUW,
28603 IX86_BUILTIN_VPCOMEQUD,
28604 IX86_BUILTIN_VPCOMNEUD,
28605 IX86_BUILTIN_VPCOMLTUD,
28606 IX86_BUILTIN_VPCOMLEUD,
28607 IX86_BUILTIN_VPCOMGTUD,
28608 IX86_BUILTIN_VPCOMGEUD,
28609 IX86_BUILTIN_VPCOMFALSEUD,
28610 IX86_BUILTIN_VPCOMTRUEUD,
28612 IX86_BUILTIN_VPCOMEQUQ,
28613 IX86_BUILTIN_VPCOMNEUQ,
28614 IX86_BUILTIN_VPCOMLTUQ,
28615 IX86_BUILTIN_VPCOMLEUQ,
28616 IX86_BUILTIN_VPCOMGTUQ,
28617 IX86_BUILTIN_VPCOMGEUQ,
28618 IX86_BUILTIN_VPCOMFALSEUQ,
28619 IX86_BUILTIN_VPCOMTRUEUQ,
28621 IX86_BUILTIN_VPCOMEQB,
28622 IX86_BUILTIN_VPCOMNEB,
28623 IX86_BUILTIN_VPCOMLTB,
28624 IX86_BUILTIN_VPCOMLEB,
28625 IX86_BUILTIN_VPCOMGTB,
28626 IX86_BUILTIN_VPCOMGEB,
28627 IX86_BUILTIN_VPCOMFALSEB,
28628 IX86_BUILTIN_VPCOMTRUEB,
28630 IX86_BUILTIN_VPCOMEQW,
28631 IX86_BUILTIN_VPCOMNEW,
28632 IX86_BUILTIN_VPCOMLTW,
28633 IX86_BUILTIN_VPCOMLEW,
28634 IX86_BUILTIN_VPCOMGTW,
28635 IX86_BUILTIN_VPCOMGEW,
28636 IX86_BUILTIN_VPCOMFALSEW,
28637 IX86_BUILTIN_VPCOMTRUEW,
28639 IX86_BUILTIN_VPCOMEQD,
28640 IX86_BUILTIN_VPCOMNED,
28641 IX86_BUILTIN_VPCOMLTD,
28642 IX86_BUILTIN_VPCOMLED,
28643 IX86_BUILTIN_VPCOMGTD,
28644 IX86_BUILTIN_VPCOMGED,
28645 IX86_BUILTIN_VPCOMFALSED,
28646 IX86_BUILTIN_VPCOMTRUED,
28648 IX86_BUILTIN_VPCOMEQQ,
28649 IX86_BUILTIN_VPCOMNEQ,
28650 IX86_BUILTIN_VPCOMLTQ,
28651 IX86_BUILTIN_VPCOMLEQ,
28652 IX86_BUILTIN_VPCOMGTQ,
28653 IX86_BUILTIN_VPCOMGEQ,
28654 IX86_BUILTIN_VPCOMFALSEQ,
28655 IX86_BUILTIN_VPCOMTRUEQ,
28657 /* LWP instructions. */
28658 IX86_BUILTIN_LLWPCB,
28659 IX86_BUILTIN_SLWPCB,
28660 IX86_BUILTIN_LWPVAL32,
28661 IX86_BUILTIN_LWPVAL64,
28662 IX86_BUILTIN_LWPINS32,
28663 IX86_BUILTIN_LWPINS64,
28665 IX86_BUILTIN_CLZS,
28667 /* RTM */
28668 IX86_BUILTIN_XBEGIN,
28669 IX86_BUILTIN_XEND,
28670 IX86_BUILTIN_XABORT,
28671 IX86_BUILTIN_XTEST,
28673 /* BMI instructions. */
28674 IX86_BUILTIN_BEXTR32,
28675 IX86_BUILTIN_BEXTR64,
28676 IX86_BUILTIN_CTZS,
28678 /* TBM instructions. */
28679 IX86_BUILTIN_BEXTRI32,
28680 IX86_BUILTIN_BEXTRI64,
28682 /* BMI2 instructions. */
28683 IX86_BUILTIN_BZHI32,
28684 IX86_BUILTIN_BZHI64,
28685 IX86_BUILTIN_PDEP32,
28686 IX86_BUILTIN_PDEP64,
28687 IX86_BUILTIN_PEXT32,
28688 IX86_BUILTIN_PEXT64,
28690 /* ADX instructions. */
28691 IX86_BUILTIN_ADDCARRYX32,
28692 IX86_BUILTIN_ADDCARRYX64,
28694 /* FSGSBASE instructions. */
28695 IX86_BUILTIN_RDFSBASE32,
28696 IX86_BUILTIN_RDFSBASE64,
28697 IX86_BUILTIN_RDGSBASE32,
28698 IX86_BUILTIN_RDGSBASE64,
28699 IX86_BUILTIN_WRFSBASE32,
28700 IX86_BUILTIN_WRFSBASE64,
28701 IX86_BUILTIN_WRGSBASE32,
28702 IX86_BUILTIN_WRGSBASE64,
28704 /* RDRND instructions. */
28705 IX86_BUILTIN_RDRAND16_STEP,
28706 IX86_BUILTIN_RDRAND32_STEP,
28707 IX86_BUILTIN_RDRAND64_STEP,
28709 /* RDSEED instructions. */
28710 IX86_BUILTIN_RDSEED16_STEP,
28711 IX86_BUILTIN_RDSEED32_STEP,
28712 IX86_BUILTIN_RDSEED64_STEP,
28714 /* F16C instructions. */
28715 IX86_BUILTIN_CVTPH2PS,
28716 IX86_BUILTIN_CVTPH2PS256,
28717 IX86_BUILTIN_CVTPS2PH,
28718 IX86_BUILTIN_CVTPS2PH256,
28720 /* CFString built-in for darwin */
28721 IX86_BUILTIN_CFSTRING,
28723 /* Builtins to get CPU type and supported features. */
28724 IX86_BUILTIN_CPU_INIT,
28725 IX86_BUILTIN_CPU_IS,
28726 IX86_BUILTIN_CPU_SUPPORTS,
28728 /* Read/write FLAGS register built-ins. */
28729 IX86_BUILTIN_READ_FLAGS,
28730 IX86_BUILTIN_WRITE_FLAGS,
28732 IX86_BUILTIN_MAX
28735 /* Table for the ix86 builtin decls. */
28736 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28738 /* Table of all of the builtin functions that are possible with different ISA's
28739 but are waiting to be built until a function is declared to use that
28740 ISA. */
28741 struct builtin_isa {
28742 const char *name; /* function name */
28743 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28744 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28745 bool const_p; /* true if the declaration is constant */
28746 bool set_and_not_built_p;
28749 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28752 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28753 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28754 function decl in the ix86_builtins array. Returns the function decl or
28755 NULL_TREE, if the builtin was not added.
28757 If the front end has a special hook for builtin functions, delay adding
28758 builtin functions that aren't in the current ISA until the ISA is changed
28759 with function specific optimization. Doing so, can save about 300K for the
28760 default compiler. When the builtin is expanded, check at that time whether
28761 it is valid.
28763 If the front end doesn't have a special hook, record all builtins, even if
28764 it isn't an instruction set in the current ISA in case the user uses
28765 function specific options for a different ISA, so that we don't get scope
28766 errors if a builtin is added in the middle of a function scope. */
28768 static inline tree
28769 def_builtin (HOST_WIDE_INT mask, const char *name,
28770 enum ix86_builtin_func_type tcode,
28771 enum ix86_builtins code)
28773 tree decl = NULL_TREE;
28775 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28777 ix86_builtins_isa[(int) code].isa = mask;
28779 mask &= ~OPTION_MASK_ISA_64BIT;
28780 if (mask == 0
28781 || (mask & ix86_isa_flags) != 0
28782 || (lang_hooks.builtin_function
28783 == lang_hooks.builtin_function_ext_scope))
28786 tree type = ix86_get_builtin_func_type (tcode);
28787 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28788 NULL, NULL_TREE);
28789 ix86_builtins[(int) code] = decl;
28790 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28792 else
28794 ix86_builtins[(int) code] = NULL_TREE;
28795 ix86_builtins_isa[(int) code].tcode = tcode;
28796 ix86_builtins_isa[(int) code].name = name;
28797 ix86_builtins_isa[(int) code].const_p = false;
28798 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28802 return decl;
28805 /* Like def_builtin, but also marks the function decl "const". */
28807 static inline tree
28808 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28809 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28811 tree decl = def_builtin (mask, name, tcode, code);
28812 if (decl)
28813 TREE_READONLY (decl) = 1;
28814 else
28815 ix86_builtins_isa[(int) code].const_p = true;
28817 return decl;
28820 /* Add any new builtin functions for a given ISA that may not have been
28821 declared. This saves a bit of space compared to adding all of the
28822 declarations to the tree, even if we didn't use them. */
28824 static void
28825 ix86_add_new_builtins (HOST_WIDE_INT isa)
28827 int i;
28829 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28831 if ((ix86_builtins_isa[i].isa & isa) != 0
28832 && ix86_builtins_isa[i].set_and_not_built_p)
28834 tree decl, type;
28836 /* Don't define the builtin again. */
28837 ix86_builtins_isa[i].set_and_not_built_p = false;
28839 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28840 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28841 type, i, BUILT_IN_MD, NULL,
28842 NULL_TREE);
28844 ix86_builtins[i] = decl;
28845 if (ix86_builtins_isa[i].const_p)
28846 TREE_READONLY (decl) = 1;
28851 /* Bits for builtin_description.flag. */
28853 /* Set when we don't support the comparison natively, and should
28854 swap_comparison in order to support it. */
28855 #define BUILTIN_DESC_SWAP_OPERANDS 1
28857 struct builtin_description
28859 const HOST_WIDE_INT mask;
28860 const enum insn_code icode;
28861 const char *const name;
28862 const enum ix86_builtins code;
28863 const enum rtx_code comparison;
28864 const int flag;
28867 static const struct builtin_description bdesc_comi[] =
28869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28895 static const struct builtin_description bdesc_pcmpestr[] =
28897 /* SSE4.2 */
28898 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28899 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28900 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28901 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28902 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28903 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28904 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28907 static const struct builtin_description bdesc_pcmpistr[] =
28909 /* SSE4.2 */
28910 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28911 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28912 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28913 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28914 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28915 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28916 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28919 /* Special builtins with variable number of arguments. */
28920 static const struct builtin_description bdesc_special_args[] =
28922 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28923 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28924 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28926 /* 80387 (for use internally for atomic compound assignment). */
28927 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28928 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28929 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28930 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28932 /* MMX */
28933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28935 /* 3DNow! */
28936 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28938 /* FXSR, XSAVE and XSAVEOPT */
28939 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28940 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28941 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28942 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28943 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28945 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28946 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28947 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28948 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28949 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28951 /* SSE */
28952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28961 /* SSE or 3DNow!A */
28962 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28963 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
28965 /* SSE2 */
28966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
28970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
28972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
28973 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
28974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
28975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28980 /* SSE3 */
28981 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28983 /* SSE4.1 */
28984 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
28986 /* SSE4A */
28987 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28988 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28990 /* AVX */
28991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
28992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
28994 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28995 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28996 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
28997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
28998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29021 /* AVX2 */
29022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29032 /* AVX512F */
29033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29081 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29082 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29083 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29084 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29085 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29086 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29088 /* FSGSBASE */
29089 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29090 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29091 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29092 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29093 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29094 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29095 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29096 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29098 /* RTM */
29099 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29100 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29101 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29104 /* Builtins with variable number of arguments. */
29105 static const struct builtin_description bdesc_args[] =
29107 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29108 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29109 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29110 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29111 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29112 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29113 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29115 /* MMX */
29116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29179 /* 3DNow! */
29180 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29182 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29183 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29201 /* 3DNow!A */
29202 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29203 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29204 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29205 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29206 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29207 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29209 /* SSE */
29210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29212 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29214 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29218 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29221 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29225 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29226 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29227 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29255 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29256 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29260 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29262 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29263 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29265 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29270 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29271 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29275 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29277 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29286 /* SSE MMX or 3Dnow!A */
29287 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29288 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29301 /* SSE2 */
29302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29308 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29320 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29321 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29325 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29327 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29328 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29329 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29330 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29358 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29362 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29364 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29365 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29367 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29370 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29438 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29471 /* SSE2 MMX */
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29475 /* SSE3 */
29476 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29477 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29479 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29480 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29482 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29483 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29484 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29486 /* SSSE3 */
29487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29488 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29519 /* SSSE3. */
29520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29523 /* SSE4.1 */
29524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29562 /* SSE4.1 */
29563 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29564 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29565 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29566 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29571 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29574 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29576 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29577 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29581 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29582 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29584 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29587 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29588 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29591 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29592 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29594 /* SSE4.2 */
29595 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29596 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29597 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29598 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29599 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29601 /* SSE4A */
29602 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29603 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29604 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29605 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29607 /* AES */
29608 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29609 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29611 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29612 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29613 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29614 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29616 /* PCLMUL */
29617 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29619 /* AVX */
29620 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29621 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29624 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29625 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29628 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29634 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29635 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29636 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29637 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29638 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29639 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29640 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29641 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29642 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29643 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29644 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29645 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29668 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29669 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29673 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29675 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29760 /* AVX2 */
29761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29762 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29763 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29764 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29769 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29770 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29771 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29772 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29778 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29800 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29801 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29802 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29803 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29804 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29805 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29806 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29807 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29808 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29809 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29810 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29811 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29825 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29908 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29910 /* BMI */
29911 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29912 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29913 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29915 /* TBM */
29916 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29917 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29919 /* F16C */
29920 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29921 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29922 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29923 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29925 /* BMI2 */
29926 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29927 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29928 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29929 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29930 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29931 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29933 /* AVX512F */
29934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
29942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
29944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
29945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
29952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
29954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
29959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
29960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
29961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
29962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
29963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
29964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
29965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
29966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
29983 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
29984 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
29985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
29986 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29987 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30095 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30096 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30130 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30135 /* Mask arithmetic operations */
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30147 /* SHA */
30148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30157 /* Builtins with rounding support. */
30158 static const struct builtin_description bdesc_round_args[] =
30160 /* AVX512F */
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30180 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30182 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30189 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30191 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30241 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30243 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30245 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30247 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30249 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30251 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30253 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30255 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30281 /* AVX512ER */
30282 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30283 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30284 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30285 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30286 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30287 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30288 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30289 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30290 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30291 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30294 /* FMA4 and XOP. */
30295 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30296 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30297 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30298 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30299 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30300 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30301 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30302 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30303 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30304 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30305 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30306 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30307 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30308 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30309 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30310 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30311 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30312 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30313 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30314 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30315 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30316 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30317 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30318 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30319 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30320 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30321 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30322 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30323 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30324 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30325 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30326 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30327 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30328 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30329 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30330 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30331 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30332 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30333 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30334 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30335 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30336 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30337 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30338 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30339 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30340 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30341 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30342 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30343 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30344 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30345 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30346 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30348 static const struct builtin_description bdesc_multi_arg[] =
30350 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30351 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30352 UNKNOWN, (int)MULTI_ARG_3_SF },
30353 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30354 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30355 UNKNOWN, (int)MULTI_ARG_3_DF },
30357 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30358 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30359 UNKNOWN, (int)MULTI_ARG_3_SF },
30360 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30361 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30362 UNKNOWN, (int)MULTI_ARG_3_DF },
30364 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30365 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30366 UNKNOWN, (int)MULTI_ARG_3_SF },
30367 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30368 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30369 UNKNOWN, (int)MULTI_ARG_3_DF },
30370 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30371 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30372 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30373 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30374 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30375 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30377 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30378 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30379 UNKNOWN, (int)MULTI_ARG_3_SF },
30380 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30381 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30382 UNKNOWN, (int)MULTI_ARG_3_DF },
30383 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30384 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30385 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30386 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30387 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30388 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30550 /* TM vector builtins. */
30552 /* Reuse the existing x86-specific `struct builtin_description' cause
30553 we're lazy. Add casts to make them fit. */
30554 static const struct builtin_description bdesc_tm[] =
30556 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30557 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30558 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30559 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30560 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30561 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30562 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30564 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30565 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30566 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30567 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30568 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30569 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30570 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30572 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30573 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30574 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30575 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30576 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30577 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30578 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30580 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30581 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30582 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30585 /* TM callbacks. */
30587 /* Return the builtin decl needed to load a vector of TYPE. */
30589 static tree
30590 ix86_builtin_tm_load (tree type)
30592 if (TREE_CODE (type) == VECTOR_TYPE)
30594 switch (tree_to_uhwi (TYPE_SIZE (type)))
30596 case 64:
30597 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30598 case 128:
30599 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30600 case 256:
30601 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30604 return NULL_TREE;
30607 /* Return the builtin decl needed to store a vector of TYPE. */
30609 static tree
30610 ix86_builtin_tm_store (tree type)
30612 if (TREE_CODE (type) == VECTOR_TYPE)
30614 switch (tree_to_uhwi (TYPE_SIZE (type)))
30616 case 64:
30617 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30618 case 128:
30619 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30620 case 256:
30621 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30624 return NULL_TREE;
30627 /* Initialize the transactional memory vector load/store builtins. */
30629 static void
30630 ix86_init_tm_builtins (void)
30632 enum ix86_builtin_func_type ftype;
30633 const struct builtin_description *d;
30634 size_t i;
30635 tree decl;
30636 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30637 tree attrs_log, attrs_type_log;
30639 if (!flag_tm)
30640 return;
30642 /* If there are no builtins defined, we must be compiling in a
30643 language without trans-mem support. */
30644 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30645 return;
30647 /* Use whatever attributes a normal TM load has. */
30648 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30649 attrs_load = DECL_ATTRIBUTES (decl);
30650 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30651 /* Use whatever attributes a normal TM store has. */
30652 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30653 attrs_store = DECL_ATTRIBUTES (decl);
30654 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30655 /* Use whatever attributes a normal TM log has. */
30656 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30657 attrs_log = DECL_ATTRIBUTES (decl);
30658 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30660 for (i = 0, d = bdesc_tm;
30661 i < ARRAY_SIZE (bdesc_tm);
30662 i++, d++)
30664 if ((d->mask & ix86_isa_flags) != 0
30665 || (lang_hooks.builtin_function
30666 == lang_hooks.builtin_function_ext_scope))
30668 tree type, attrs, attrs_type;
30669 enum built_in_function code = (enum built_in_function) d->code;
30671 ftype = (enum ix86_builtin_func_type) d->flag;
30672 type = ix86_get_builtin_func_type (ftype);
30674 if (BUILTIN_TM_LOAD_P (code))
30676 attrs = attrs_load;
30677 attrs_type = attrs_type_load;
30679 else if (BUILTIN_TM_STORE_P (code))
30681 attrs = attrs_store;
30682 attrs_type = attrs_type_store;
30684 else
30686 attrs = attrs_log;
30687 attrs_type = attrs_type_log;
30689 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30690 /* The builtin without the prefix for
30691 calling it directly. */
30692 d->name + strlen ("__builtin_"),
30693 attrs);
30694 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30695 set the TYPE_ATTRIBUTES. */
30696 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30698 set_builtin_decl (code, decl, false);
30703 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30704 in the current target ISA to allow the user to compile particular modules
30705 with different target specific options that differ from the command line
30706 options. */
30707 static void
30708 ix86_init_mmx_sse_builtins (void)
30710 const struct builtin_description * d;
30711 enum ix86_builtin_func_type ftype;
30712 size_t i;
30714 /* Add all special builtins with variable number of operands. */
30715 for (i = 0, d = bdesc_special_args;
30716 i < ARRAY_SIZE (bdesc_special_args);
30717 i++, d++)
30719 if (d->name == 0)
30720 continue;
30722 ftype = (enum ix86_builtin_func_type) d->flag;
30723 def_builtin (d->mask, d->name, ftype, d->code);
30726 /* Add all builtins with variable number of operands. */
30727 for (i = 0, d = bdesc_args;
30728 i < ARRAY_SIZE (bdesc_args);
30729 i++, d++)
30731 if (d->name == 0)
30732 continue;
30734 ftype = (enum ix86_builtin_func_type) d->flag;
30735 def_builtin_const (d->mask, d->name, ftype, d->code);
30738 /* Add all builtins with rounding. */
30739 for (i = 0, d = bdesc_round_args;
30740 i < ARRAY_SIZE (bdesc_round_args);
30741 i++, d++)
30743 if (d->name == 0)
30744 continue;
30746 ftype = (enum ix86_builtin_func_type) d->flag;
30747 def_builtin_const (d->mask, d->name, ftype, d->code);
30750 /* pcmpestr[im] insns. */
30751 for (i = 0, d = bdesc_pcmpestr;
30752 i < ARRAY_SIZE (bdesc_pcmpestr);
30753 i++, d++)
30755 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30756 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30757 else
30758 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30759 def_builtin_const (d->mask, d->name, ftype, d->code);
30762 /* pcmpistr[im] insns. */
30763 for (i = 0, d = bdesc_pcmpistr;
30764 i < ARRAY_SIZE (bdesc_pcmpistr);
30765 i++, d++)
30767 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30768 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30769 else
30770 ftype = INT_FTYPE_V16QI_V16QI_INT;
30771 def_builtin_const (d->mask, d->name, ftype, d->code);
30774 /* comi/ucomi insns. */
30775 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30777 if (d->mask == OPTION_MASK_ISA_SSE2)
30778 ftype = INT_FTYPE_V2DF_V2DF;
30779 else
30780 ftype = INT_FTYPE_V4SF_V4SF;
30781 def_builtin_const (d->mask, d->name, ftype, d->code);
30784 /* SSE */
30785 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30786 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30787 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30788 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30790 /* SSE or 3DNow!A */
30791 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30792 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30793 IX86_BUILTIN_MASKMOVQ);
30795 /* SSE2 */
30796 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30797 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30799 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30800 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30801 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30802 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30804 /* SSE3. */
30805 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30806 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30807 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30808 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30810 /* AES */
30811 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30812 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30813 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30814 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30815 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30816 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30817 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30818 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30819 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30820 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30821 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30822 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30824 /* PCLMUL */
30825 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30826 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30828 /* RDRND */
30829 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30830 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30831 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30832 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30833 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30834 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30835 IX86_BUILTIN_RDRAND64_STEP);
30837 /* AVX2 */
30838 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30839 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30840 IX86_BUILTIN_GATHERSIV2DF);
30842 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30843 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30844 IX86_BUILTIN_GATHERSIV4DF);
30846 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30847 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30848 IX86_BUILTIN_GATHERDIV2DF);
30850 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30851 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30852 IX86_BUILTIN_GATHERDIV4DF);
30854 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30855 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30856 IX86_BUILTIN_GATHERSIV4SF);
30858 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30859 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30860 IX86_BUILTIN_GATHERSIV8SF);
30862 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30863 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30864 IX86_BUILTIN_GATHERDIV4SF);
30866 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30867 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30868 IX86_BUILTIN_GATHERDIV8SF);
30870 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30871 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30872 IX86_BUILTIN_GATHERSIV2DI);
30874 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30875 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30876 IX86_BUILTIN_GATHERSIV4DI);
30878 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30879 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30880 IX86_BUILTIN_GATHERDIV2DI);
30882 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30883 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30884 IX86_BUILTIN_GATHERDIV4DI);
30886 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30887 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30888 IX86_BUILTIN_GATHERSIV4SI);
30890 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30891 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30892 IX86_BUILTIN_GATHERSIV8SI);
30894 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30895 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30896 IX86_BUILTIN_GATHERDIV4SI);
30898 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30899 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30900 IX86_BUILTIN_GATHERDIV8SI);
30902 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30903 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30904 IX86_BUILTIN_GATHERALTSIV4DF);
30906 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30907 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30908 IX86_BUILTIN_GATHERALTDIV8SF);
30910 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30911 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30912 IX86_BUILTIN_GATHERALTSIV4DI);
30914 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30915 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30916 IX86_BUILTIN_GATHERALTDIV8SI);
30918 /* AVX512F */
30919 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30920 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30921 IX86_BUILTIN_GATHER3SIV16SF);
30923 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30924 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30925 IX86_BUILTIN_GATHER3SIV8DF);
30927 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30928 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30929 IX86_BUILTIN_GATHER3DIV16SF);
30931 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30932 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30933 IX86_BUILTIN_GATHER3DIV8DF);
30935 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30936 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
30937 IX86_BUILTIN_GATHER3SIV16SI);
30939 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30940 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
30941 IX86_BUILTIN_GATHER3SIV8DI);
30943 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30944 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
30945 IX86_BUILTIN_GATHER3DIV16SI);
30947 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30948 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
30949 IX86_BUILTIN_GATHER3DIV8DI);
30951 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30952 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30953 IX86_BUILTIN_GATHER3ALTSIV8DF);
30955 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30956 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30957 IX86_BUILTIN_GATHER3ALTDIV16SF);
30959 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30960 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30961 IX86_BUILTIN_GATHER3ALTSIV8DI);
30963 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30964 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30965 IX86_BUILTIN_GATHER3ALTDIV16SI);
30967 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30968 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
30969 IX86_BUILTIN_SCATTERSIV16SF);
30971 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30972 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
30973 IX86_BUILTIN_SCATTERSIV8DF);
30975 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30976 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
30977 IX86_BUILTIN_SCATTERDIV16SF);
30979 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30980 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
30981 IX86_BUILTIN_SCATTERDIV8DF);
30983 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30984 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
30985 IX86_BUILTIN_SCATTERSIV16SI);
30987 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30988 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
30989 IX86_BUILTIN_SCATTERSIV8DI);
30991 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30992 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
30993 IX86_BUILTIN_SCATTERDIV16SI);
30995 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30996 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
30997 IX86_BUILTIN_SCATTERDIV8DI);
30999 /* AVX512PF */
31000 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31001 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31002 IX86_BUILTIN_GATHERPFDPD);
31003 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31004 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31005 IX86_BUILTIN_GATHERPFDPS);
31006 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31007 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31008 IX86_BUILTIN_GATHERPFQPD);
31009 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31010 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31011 IX86_BUILTIN_GATHERPFQPS);
31012 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31013 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31014 IX86_BUILTIN_SCATTERPFDPD);
31015 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31016 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31017 IX86_BUILTIN_SCATTERPFDPS);
31018 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31019 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31020 IX86_BUILTIN_SCATTERPFQPD);
31021 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31022 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31023 IX86_BUILTIN_SCATTERPFQPS);
31025 /* SHA */
31026 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31027 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31028 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31029 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31030 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31031 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31032 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31033 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31034 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31035 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31036 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31037 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31038 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31039 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31041 /* RTM. */
31042 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31043 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31045 /* MMX access to the vec_init patterns. */
31046 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31047 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31049 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31050 V4HI_FTYPE_HI_HI_HI_HI,
31051 IX86_BUILTIN_VEC_INIT_V4HI);
31053 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31054 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31055 IX86_BUILTIN_VEC_INIT_V8QI);
31057 /* Access to the vec_extract patterns. */
31058 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31059 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31060 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31061 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31062 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31063 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31064 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31065 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31066 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31067 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31069 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31070 "__builtin_ia32_vec_ext_v4hi",
31071 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31073 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31074 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31076 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31077 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31079 /* Access to the vec_set patterns. */
31080 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31081 "__builtin_ia32_vec_set_v2di",
31082 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31084 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31085 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31087 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31088 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31090 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31091 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31093 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31094 "__builtin_ia32_vec_set_v4hi",
31095 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31097 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31098 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31100 /* RDSEED */
31101 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31102 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31103 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31104 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31105 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31106 "__builtin_ia32_rdseed_di_step",
31107 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31109 /* ADCX */
31110 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31111 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31112 def_builtin (OPTION_MASK_ISA_64BIT,
31113 "__builtin_ia32_addcarryx_u64",
31114 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31115 IX86_BUILTIN_ADDCARRYX64);
31117 /* Read/write FLAGS. */
31118 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31119 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31120 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31121 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31122 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31123 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31124 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31125 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31128 /* Add FMA4 multi-arg argument instructions */
31129 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31131 if (d->name == 0)
31132 continue;
31134 ftype = (enum ix86_builtin_func_type) d->flag;
31135 def_builtin_const (d->mask, d->name, ftype, d->code);
31139 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31140 to return a pointer to VERSION_DECL if the outcome of the expression
31141 formed by PREDICATE_CHAIN is true. This function will be called during
31142 version dispatch to decide which function version to execute. It returns
31143 the basic block at the end, to which more conditions can be added. */
31145 static basic_block
31146 add_condition_to_bb (tree function_decl, tree version_decl,
31147 tree predicate_chain, basic_block new_bb)
31149 gimple return_stmt;
31150 tree convert_expr, result_var;
31151 gimple convert_stmt;
31152 gimple call_cond_stmt;
31153 gimple if_else_stmt;
31155 basic_block bb1, bb2, bb3;
31156 edge e12, e23;
31158 tree cond_var, and_expr_var = NULL_TREE;
31159 gimple_seq gseq;
31161 tree predicate_decl, predicate_arg;
31163 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31165 gcc_assert (new_bb != NULL);
31166 gseq = bb_seq (new_bb);
31169 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31170 build_fold_addr_expr (version_decl));
31171 result_var = create_tmp_var (ptr_type_node, NULL);
31172 convert_stmt = gimple_build_assign (result_var, convert_expr);
31173 return_stmt = gimple_build_return (result_var);
31175 if (predicate_chain == NULL_TREE)
31177 gimple_seq_add_stmt (&gseq, convert_stmt);
31178 gimple_seq_add_stmt (&gseq, return_stmt);
31179 set_bb_seq (new_bb, gseq);
31180 gimple_set_bb (convert_stmt, new_bb);
31181 gimple_set_bb (return_stmt, new_bb);
31182 pop_cfun ();
31183 return new_bb;
31186 while (predicate_chain != NULL)
31188 cond_var = create_tmp_var (integer_type_node, NULL);
31189 predicate_decl = TREE_PURPOSE (predicate_chain);
31190 predicate_arg = TREE_VALUE (predicate_chain);
31191 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31192 gimple_call_set_lhs (call_cond_stmt, cond_var);
31194 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31195 gimple_set_bb (call_cond_stmt, new_bb);
31196 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31198 predicate_chain = TREE_CHAIN (predicate_chain);
31200 if (and_expr_var == NULL)
31201 and_expr_var = cond_var;
31202 else
31204 gimple assign_stmt;
31205 /* Use MIN_EXPR to check if any integer is zero?.
31206 and_expr_var = min_expr <cond_var, and_expr_var> */
31207 assign_stmt = gimple_build_assign (and_expr_var,
31208 build2 (MIN_EXPR, integer_type_node,
31209 cond_var, and_expr_var));
31211 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31212 gimple_set_bb (assign_stmt, new_bb);
31213 gimple_seq_add_stmt (&gseq, assign_stmt);
31217 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31218 integer_zero_node,
31219 NULL_TREE, NULL_TREE);
31220 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31221 gimple_set_bb (if_else_stmt, new_bb);
31222 gimple_seq_add_stmt (&gseq, if_else_stmt);
31224 gimple_seq_add_stmt (&gseq, convert_stmt);
31225 gimple_seq_add_stmt (&gseq, return_stmt);
31226 set_bb_seq (new_bb, gseq);
31228 bb1 = new_bb;
31229 e12 = split_block (bb1, if_else_stmt);
31230 bb2 = e12->dest;
31231 e12->flags &= ~EDGE_FALLTHRU;
31232 e12->flags |= EDGE_TRUE_VALUE;
31234 e23 = split_block (bb2, return_stmt);
31236 gimple_set_bb (convert_stmt, bb2);
31237 gimple_set_bb (return_stmt, bb2);
31239 bb3 = e23->dest;
31240 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31242 remove_edge (e23);
31243 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31245 pop_cfun ();
31247 return bb3;
31250 /* This parses the attribute arguments to target in DECL and determines
31251 the right builtin to use to match the platform specification.
31252 It returns the priority value for this version decl. If PREDICATE_LIST
31253 is not NULL, it stores the list of cpu features that need to be checked
31254 before dispatching this function. */
31256 static unsigned int
31257 get_builtin_code_for_version (tree decl, tree *predicate_list)
31259 tree attrs;
31260 struct cl_target_option cur_target;
31261 tree target_node;
31262 struct cl_target_option *new_target;
31263 const char *arg_str = NULL;
31264 const char *attrs_str = NULL;
31265 char *tok_str = NULL;
31266 char *token;
31268 /* Priority of i386 features, greater value is higher priority. This is
31269 used to decide the order in which function dispatch must happen. For
31270 instance, a version specialized for SSE4.2 should be checked for dispatch
31271 before a version for SSE3, as SSE4.2 implies SSE3. */
31272 enum feature_priority
31274 P_ZERO = 0,
31275 P_MMX,
31276 P_SSE,
31277 P_SSE2,
31278 P_SSE3,
31279 P_SSSE3,
31280 P_PROC_SSSE3,
31281 P_SSE4_A,
31282 P_PROC_SSE4_A,
31283 P_SSE4_1,
31284 P_SSE4_2,
31285 P_PROC_SSE4_2,
31286 P_POPCNT,
31287 P_AVX,
31288 P_PROC_AVX,
31289 P_FMA4,
31290 P_XOP,
31291 P_PROC_XOP,
31292 P_FMA,
31293 P_PROC_FMA,
31294 P_AVX2,
31295 P_PROC_AVX2
31298 enum feature_priority priority = P_ZERO;
31300 /* These are the target attribute strings for which a dispatcher is
31301 available, from fold_builtin_cpu. */
31303 static struct _feature_list
31305 const char *const name;
31306 const enum feature_priority priority;
31308 const feature_list[] =
31310 {"mmx", P_MMX},
31311 {"sse", P_SSE},
31312 {"sse2", P_SSE2},
31313 {"sse3", P_SSE3},
31314 {"sse4a", P_SSE4_A},
31315 {"ssse3", P_SSSE3},
31316 {"sse4.1", P_SSE4_1},
31317 {"sse4.2", P_SSE4_2},
31318 {"popcnt", P_POPCNT},
31319 {"avx", P_AVX},
31320 {"fma4", P_FMA4},
31321 {"xop", P_XOP},
31322 {"fma", P_FMA},
31323 {"avx2", P_AVX2}
31327 static unsigned int NUM_FEATURES
31328 = sizeof (feature_list) / sizeof (struct _feature_list);
31330 unsigned int i;
31332 tree predicate_chain = NULL_TREE;
31333 tree predicate_decl, predicate_arg;
31335 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31336 gcc_assert (attrs != NULL);
31338 attrs = TREE_VALUE (TREE_VALUE (attrs));
31340 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31341 attrs_str = TREE_STRING_POINTER (attrs);
31343 /* Return priority zero for default function. */
31344 if (strcmp (attrs_str, "default") == 0)
31345 return 0;
31347 /* Handle arch= if specified. For priority, set it to be 1 more than
31348 the best instruction set the processor can handle. For instance, if
31349 there is a version for atom and a version for ssse3 (the highest ISA
31350 priority for atom), the atom version must be checked for dispatch
31351 before the ssse3 version. */
31352 if (strstr (attrs_str, "arch=") != NULL)
31354 cl_target_option_save (&cur_target, &global_options);
31355 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31356 &global_options_set);
31358 gcc_assert (target_node);
31359 new_target = TREE_TARGET_OPTION (target_node);
31360 gcc_assert (new_target);
31362 if (new_target->arch_specified && new_target->arch > 0)
31364 switch (new_target->arch)
31366 case PROCESSOR_CORE2:
31367 arg_str = "core2";
31368 priority = P_PROC_SSSE3;
31369 break;
31370 case PROCESSOR_NEHALEM:
31371 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31372 arg_str = "westmere";
31373 else
31374 /* We translate "arch=corei7" and "arch=nehalem" to
31375 "corei7" so that it will be mapped to M_INTEL_COREI7
31376 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31377 arg_str = "corei7";
31378 priority = P_PROC_SSE4_2;
31379 break;
31380 case PROCESSOR_SANDYBRIDGE:
31381 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31382 arg_str = "ivybridge";
31383 else
31384 arg_str = "sandybridge";
31385 priority = P_PROC_AVX;
31386 break;
31387 case PROCESSOR_HASWELL:
31388 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31389 arg_str = "broadwell";
31390 else
31391 arg_str = "haswell";
31392 priority = P_PROC_AVX2;
31393 break;
31394 case PROCESSOR_BONNELL:
31395 arg_str = "bonnell";
31396 priority = P_PROC_SSSE3;
31397 break;
31398 case PROCESSOR_SILVERMONT:
31399 arg_str = "silvermont";
31400 priority = P_PROC_SSE4_2;
31401 break;
31402 case PROCESSOR_AMDFAM10:
31403 arg_str = "amdfam10h";
31404 priority = P_PROC_SSE4_A;
31405 break;
31406 case PROCESSOR_BTVER1:
31407 arg_str = "btver1";
31408 priority = P_PROC_SSE4_A;
31409 break;
31410 case PROCESSOR_BTVER2:
31411 arg_str = "btver2";
31412 priority = P_PROC_AVX;
31413 break;
31414 case PROCESSOR_BDVER1:
31415 arg_str = "bdver1";
31416 priority = P_PROC_XOP;
31417 break;
31418 case PROCESSOR_BDVER2:
31419 arg_str = "bdver2";
31420 priority = P_PROC_FMA;
31421 break;
31422 case PROCESSOR_BDVER3:
31423 arg_str = "bdver3";
31424 priority = P_PROC_FMA;
31425 break;
31426 case PROCESSOR_BDVER4:
31427 arg_str = "bdver4";
31428 priority = P_PROC_AVX2;
31429 break;
31433 cl_target_option_restore (&global_options, &cur_target);
31435 if (predicate_list && arg_str == NULL)
31437 error_at (DECL_SOURCE_LOCATION (decl),
31438 "No dispatcher found for the versioning attributes");
31439 return 0;
31442 if (predicate_list)
31444 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31445 /* For a C string literal the length includes the trailing NULL. */
31446 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31447 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31448 predicate_chain);
31452 /* Process feature name. */
31453 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31454 strcpy (tok_str, attrs_str);
31455 token = strtok (tok_str, ",");
31456 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31458 while (token != NULL)
31460 /* Do not process "arch=" */
31461 if (strncmp (token, "arch=", 5) == 0)
31463 token = strtok (NULL, ",");
31464 continue;
31466 for (i = 0; i < NUM_FEATURES; ++i)
31468 if (strcmp (token, feature_list[i].name) == 0)
31470 if (predicate_list)
31472 predicate_arg = build_string_literal (
31473 strlen (feature_list[i].name) + 1,
31474 feature_list[i].name);
31475 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31476 predicate_chain);
31478 /* Find the maximum priority feature. */
31479 if (feature_list[i].priority > priority)
31480 priority = feature_list[i].priority;
31482 break;
31485 if (predicate_list && i == NUM_FEATURES)
31487 error_at (DECL_SOURCE_LOCATION (decl),
31488 "No dispatcher found for %s", token);
31489 return 0;
31491 token = strtok (NULL, ",");
31493 free (tok_str);
31495 if (predicate_list && predicate_chain == NULL_TREE)
31497 error_at (DECL_SOURCE_LOCATION (decl),
31498 "No dispatcher found for the versioning attributes : %s",
31499 attrs_str);
31500 return 0;
31502 else if (predicate_list)
31504 predicate_chain = nreverse (predicate_chain);
31505 *predicate_list = predicate_chain;
31508 return priority;
31511 /* This compares the priority of target features in function DECL1
31512 and DECL2. It returns positive value if DECL1 is higher priority,
31513 negative value if DECL2 is higher priority and 0 if they are the
31514 same. */
31516 static int
31517 ix86_compare_version_priority (tree decl1, tree decl2)
31519 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31520 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31522 return (int)priority1 - (int)priority2;
31525 /* V1 and V2 point to function versions with different priorities
31526 based on the target ISA. This function compares their priorities. */
31528 static int
31529 feature_compare (const void *v1, const void *v2)
31531 typedef struct _function_version_info
31533 tree version_decl;
31534 tree predicate_chain;
31535 unsigned int dispatch_priority;
31536 } function_version_info;
31538 const function_version_info c1 = *(const function_version_info *)v1;
31539 const function_version_info c2 = *(const function_version_info *)v2;
31540 return (c2.dispatch_priority - c1.dispatch_priority);
31543 /* This function generates the dispatch function for
31544 multi-versioned functions. DISPATCH_DECL is the function which will
31545 contain the dispatch logic. FNDECLS are the function choices for
31546 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31547 in DISPATCH_DECL in which the dispatch code is generated. */
31549 static int
31550 dispatch_function_versions (tree dispatch_decl,
31551 void *fndecls_p,
31552 basic_block *empty_bb)
31554 tree default_decl;
31555 gimple ifunc_cpu_init_stmt;
31556 gimple_seq gseq;
31557 int ix;
31558 tree ele;
31559 vec<tree> *fndecls;
31560 unsigned int num_versions = 0;
31561 unsigned int actual_versions = 0;
31562 unsigned int i;
31564 struct _function_version_info
31566 tree version_decl;
31567 tree predicate_chain;
31568 unsigned int dispatch_priority;
31569 }*function_version_info;
31571 gcc_assert (dispatch_decl != NULL
31572 && fndecls_p != NULL
31573 && empty_bb != NULL);
31575 /*fndecls_p is actually a vector. */
31576 fndecls = static_cast<vec<tree> *> (fndecls_p);
31578 /* At least one more version other than the default. */
31579 num_versions = fndecls->length ();
31580 gcc_assert (num_versions >= 2);
31582 function_version_info = (struct _function_version_info *)
31583 XNEWVEC (struct _function_version_info, (num_versions - 1));
31585 /* The first version in the vector is the default decl. */
31586 default_decl = (*fndecls)[0];
31588 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31590 gseq = bb_seq (*empty_bb);
31591 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31592 constructors, so explicity call __builtin_cpu_init here. */
31593 ifunc_cpu_init_stmt = gimple_build_call_vec (
31594 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31595 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31596 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31597 set_bb_seq (*empty_bb, gseq);
31599 pop_cfun ();
31602 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31604 tree version_decl = ele;
31605 tree predicate_chain = NULL_TREE;
31606 unsigned int priority;
31607 /* Get attribute string, parse it and find the right predicate decl.
31608 The predicate function could be a lengthy combination of many
31609 features, like arch-type and various isa-variants. */
31610 priority = get_builtin_code_for_version (version_decl,
31611 &predicate_chain);
31613 if (predicate_chain == NULL_TREE)
31614 continue;
31616 function_version_info [actual_versions].version_decl = version_decl;
31617 function_version_info [actual_versions].predicate_chain
31618 = predicate_chain;
31619 function_version_info [actual_versions].dispatch_priority = priority;
31620 actual_versions++;
31623 /* Sort the versions according to descending order of dispatch priority. The
31624 priority is based on the ISA. This is not a perfect solution. There
31625 could still be ambiguity. If more than one function version is suitable
31626 to execute, which one should be dispatched? In future, allow the user
31627 to specify a dispatch priority next to the version. */
31628 qsort (function_version_info, actual_versions,
31629 sizeof (struct _function_version_info), feature_compare);
31631 for (i = 0; i < actual_versions; ++i)
31632 *empty_bb = add_condition_to_bb (dispatch_decl,
31633 function_version_info[i].version_decl,
31634 function_version_info[i].predicate_chain,
31635 *empty_bb);
31637 /* dispatch default version at the end. */
31638 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31639 NULL, *empty_bb);
31641 free (function_version_info);
31642 return 0;
31645 /* Comparator function to be used in qsort routine to sort attribute
31646 specification strings to "target". */
31648 static int
31649 attr_strcmp (const void *v1, const void *v2)
31651 const char *c1 = *(char *const*)v1;
31652 const char *c2 = *(char *const*)v2;
31653 return strcmp (c1, c2);
31656 /* ARGLIST is the argument to target attribute. This function tokenizes
31657 the comma separated arguments, sorts them and returns a string which
31658 is a unique identifier for the comma separated arguments. It also
31659 replaces non-identifier characters "=,-" with "_". */
31661 static char *
31662 sorted_attr_string (tree arglist)
31664 tree arg;
31665 size_t str_len_sum = 0;
31666 char **args = NULL;
31667 char *attr_str, *ret_str;
31668 char *attr = NULL;
31669 unsigned int argnum = 1;
31670 unsigned int i;
31672 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31674 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31675 size_t len = strlen (str);
31676 str_len_sum += len + 1;
31677 if (arg != arglist)
31678 argnum++;
31679 for (i = 0; i < strlen (str); i++)
31680 if (str[i] == ',')
31681 argnum++;
31684 attr_str = XNEWVEC (char, str_len_sum);
31685 str_len_sum = 0;
31686 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31688 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31689 size_t len = strlen (str);
31690 memcpy (attr_str + str_len_sum, str, len);
31691 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31692 str_len_sum += len + 1;
31695 /* Replace "=,-" with "_". */
31696 for (i = 0; i < strlen (attr_str); i++)
31697 if (attr_str[i] == '=' || attr_str[i]== '-')
31698 attr_str[i] = '_';
31700 if (argnum == 1)
31701 return attr_str;
31703 args = XNEWVEC (char *, argnum);
31705 i = 0;
31706 attr = strtok (attr_str, ",");
31707 while (attr != NULL)
31709 args[i] = attr;
31710 i++;
31711 attr = strtok (NULL, ",");
31714 qsort (args, argnum, sizeof (char *), attr_strcmp);
31716 ret_str = XNEWVEC (char, str_len_sum);
31717 str_len_sum = 0;
31718 for (i = 0; i < argnum; i++)
31720 size_t len = strlen (args[i]);
31721 memcpy (ret_str + str_len_sum, args[i], len);
31722 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31723 str_len_sum += len + 1;
31726 XDELETEVEC (args);
31727 XDELETEVEC (attr_str);
31728 return ret_str;
31731 /* This function changes the assembler name for functions that are
31732 versions. If DECL is a function version and has a "target"
31733 attribute, it appends the attribute string to its assembler name. */
31735 static tree
31736 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31738 tree version_attr;
31739 const char *orig_name, *version_string;
31740 char *attr_str, *assembler_name;
31742 if (DECL_DECLARED_INLINE_P (decl)
31743 && lookup_attribute ("gnu_inline",
31744 DECL_ATTRIBUTES (decl)))
31745 error_at (DECL_SOURCE_LOCATION (decl),
31746 "Function versions cannot be marked as gnu_inline,"
31747 " bodies have to be generated");
31749 if (DECL_VIRTUAL_P (decl)
31750 || DECL_VINDEX (decl))
31751 sorry ("Virtual function multiversioning not supported");
31753 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31755 /* target attribute string cannot be NULL. */
31756 gcc_assert (version_attr != NULL_TREE);
31758 orig_name = IDENTIFIER_POINTER (id);
31759 version_string
31760 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31762 if (strcmp (version_string, "default") == 0)
31763 return id;
31765 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31766 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31768 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31770 /* Allow assembler name to be modified if already set. */
31771 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31772 SET_DECL_RTL (decl, NULL);
31774 tree ret = get_identifier (assembler_name);
31775 XDELETEVEC (attr_str);
31776 XDELETEVEC (assembler_name);
31777 return ret;
31780 /* This function returns true if FN1 and FN2 are versions of the same function,
31781 that is, the target strings of the function decls are different. This assumes
31782 that FN1 and FN2 have the same signature. */
31784 static bool
31785 ix86_function_versions (tree fn1, tree fn2)
31787 tree attr1, attr2;
31788 char *target1, *target2;
31789 bool result;
31791 if (TREE_CODE (fn1) != FUNCTION_DECL
31792 || TREE_CODE (fn2) != FUNCTION_DECL)
31793 return false;
31795 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31796 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31798 /* At least one function decl should have the target attribute specified. */
31799 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31800 return false;
31802 /* Diagnose missing target attribute if one of the decls is already
31803 multi-versioned. */
31804 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31806 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31808 if (attr2 != NULL_TREE)
31810 tree tem = fn1;
31811 fn1 = fn2;
31812 fn2 = tem;
31813 attr1 = attr2;
31815 error_at (DECL_SOURCE_LOCATION (fn2),
31816 "missing %<target%> attribute for multi-versioned %D",
31817 fn2);
31818 inform (DECL_SOURCE_LOCATION (fn1),
31819 "previous declaration of %D", fn1);
31820 /* Prevent diagnosing of the same error multiple times. */
31821 DECL_ATTRIBUTES (fn2)
31822 = tree_cons (get_identifier ("target"),
31823 copy_node (TREE_VALUE (attr1)),
31824 DECL_ATTRIBUTES (fn2));
31826 return false;
31829 target1 = sorted_attr_string (TREE_VALUE (attr1));
31830 target2 = sorted_attr_string (TREE_VALUE (attr2));
31832 /* The sorted target strings must be different for fn1 and fn2
31833 to be versions. */
31834 if (strcmp (target1, target2) == 0)
31835 result = false;
31836 else
31837 result = true;
31839 XDELETEVEC (target1);
31840 XDELETEVEC (target2);
31842 return result;
31845 static tree
31846 ix86_mangle_decl_assembler_name (tree decl, tree id)
31848 /* For function version, add the target suffix to the assembler name. */
31849 if (TREE_CODE (decl) == FUNCTION_DECL
31850 && DECL_FUNCTION_VERSIONED (decl))
31851 id = ix86_mangle_function_version_assembler_name (decl, id);
31852 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31853 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31854 #endif
31856 return id;
31859 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31860 is true, append the full path name of the source file. */
31862 static char *
31863 make_name (tree decl, const char *suffix, bool make_unique)
31865 char *global_var_name;
31866 int name_len;
31867 const char *name;
31868 const char *unique_name = NULL;
31870 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31872 /* Get a unique name that can be used globally without any chances
31873 of collision at link time. */
31874 if (make_unique)
31875 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31877 name_len = strlen (name) + strlen (suffix) + 2;
31879 if (make_unique)
31880 name_len += strlen (unique_name) + 1;
31881 global_var_name = XNEWVEC (char, name_len);
31883 /* Use '.' to concatenate names as it is demangler friendly. */
31884 if (make_unique)
31885 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31886 suffix);
31887 else
31888 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31890 return global_var_name;
31893 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31895 /* Make a dispatcher declaration for the multi-versioned function DECL.
31896 Calls to DECL function will be replaced with calls to the dispatcher
31897 by the front-end. Return the decl created. */
31899 static tree
31900 make_dispatcher_decl (const tree decl)
31902 tree func_decl;
31903 char *func_name;
31904 tree fn_type, func_type;
31905 bool is_uniq = false;
31907 if (TREE_PUBLIC (decl) == 0)
31908 is_uniq = true;
31910 func_name = make_name (decl, "ifunc", is_uniq);
31912 fn_type = TREE_TYPE (decl);
31913 func_type = build_function_type (TREE_TYPE (fn_type),
31914 TYPE_ARG_TYPES (fn_type));
31916 func_decl = build_fn_decl (func_name, func_type);
31917 XDELETEVEC (func_name);
31918 TREE_USED (func_decl) = 1;
31919 DECL_CONTEXT (func_decl) = NULL_TREE;
31920 DECL_INITIAL (func_decl) = error_mark_node;
31921 DECL_ARTIFICIAL (func_decl) = 1;
31922 /* Mark this func as external, the resolver will flip it again if
31923 it gets generated. */
31924 DECL_EXTERNAL (func_decl) = 1;
31925 /* This will be of type IFUNCs have to be externally visible. */
31926 TREE_PUBLIC (func_decl) = 1;
31928 return func_decl;
31931 #endif
31933 /* Returns true if decl is multi-versioned and DECL is the default function,
31934 that is it is not tagged with target specific optimization. */
31936 static bool
31937 is_function_default_version (const tree decl)
31939 if (TREE_CODE (decl) != FUNCTION_DECL
31940 || !DECL_FUNCTION_VERSIONED (decl))
31941 return false;
31942 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31943 gcc_assert (attr);
31944 attr = TREE_VALUE (TREE_VALUE (attr));
31945 return (TREE_CODE (attr) == STRING_CST
31946 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
31949 /* Make a dispatcher declaration for the multi-versioned function DECL.
31950 Calls to DECL function will be replaced with calls to the dispatcher
31951 by the front-end. Returns the decl of the dispatcher function. */
31953 static tree
31954 ix86_get_function_versions_dispatcher (void *decl)
31956 tree fn = (tree) decl;
31957 struct cgraph_node *node = NULL;
31958 struct cgraph_node *default_node = NULL;
31959 struct cgraph_function_version_info *node_v = NULL;
31960 struct cgraph_function_version_info *first_v = NULL;
31962 tree dispatch_decl = NULL;
31964 struct cgraph_function_version_info *default_version_info = NULL;
31966 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31968 node = cgraph_get_node (fn);
31969 gcc_assert (node != NULL);
31971 node_v = get_cgraph_node_version (node);
31972 gcc_assert (node_v != NULL);
31974 if (node_v->dispatcher_resolver != NULL)
31975 return node_v->dispatcher_resolver;
31977 /* Find the default version and make it the first node. */
31978 first_v = node_v;
31979 /* Go to the beginning of the chain. */
31980 while (first_v->prev != NULL)
31981 first_v = first_v->prev;
31982 default_version_info = first_v;
31983 while (default_version_info != NULL)
31985 if (is_function_default_version
31986 (default_version_info->this_node->decl))
31987 break;
31988 default_version_info = default_version_info->next;
31991 /* If there is no default node, just return NULL. */
31992 if (default_version_info == NULL)
31993 return NULL;
31995 /* Make default info the first node. */
31996 if (first_v != default_version_info)
31998 default_version_info->prev->next = default_version_info->next;
31999 if (default_version_info->next)
32000 default_version_info->next->prev = default_version_info->prev;
32001 first_v->prev = default_version_info;
32002 default_version_info->next = first_v;
32003 default_version_info->prev = NULL;
32006 default_node = default_version_info->this_node;
32008 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32009 if (targetm.has_ifunc_p ())
32011 struct cgraph_function_version_info *it_v = NULL;
32012 struct cgraph_node *dispatcher_node = NULL;
32013 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32015 /* Right now, the dispatching is done via ifunc. */
32016 dispatch_decl = make_dispatcher_decl (default_node->decl);
32018 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32019 gcc_assert (dispatcher_node != NULL);
32020 dispatcher_node->dispatcher_function = 1;
32021 dispatcher_version_info
32022 = insert_new_cgraph_node_version (dispatcher_node);
32023 dispatcher_version_info->next = default_version_info;
32024 dispatcher_node->definition = 1;
32026 /* Set the dispatcher for all the versions. */
32027 it_v = default_version_info;
32028 while (it_v != NULL)
32030 it_v->dispatcher_resolver = dispatch_decl;
32031 it_v = it_v->next;
32034 else
32035 #endif
32037 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32038 "multiversioning needs ifunc which is not supported "
32039 "on this target");
32042 return dispatch_decl;
32045 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32046 it to CHAIN. */
32048 static tree
32049 make_attribute (const char *name, const char *arg_name, tree chain)
32051 tree attr_name;
32052 tree attr_arg_name;
32053 tree attr_args;
32054 tree attr;
32056 attr_name = get_identifier (name);
32057 attr_arg_name = build_string (strlen (arg_name), arg_name);
32058 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32059 attr = tree_cons (attr_name, attr_args, chain);
32060 return attr;
32063 /* Make the resolver function decl to dispatch the versions of
32064 a multi-versioned function, DEFAULT_DECL. Create an
32065 empty basic block in the resolver and store the pointer in
32066 EMPTY_BB. Return the decl of the resolver function. */
32068 static tree
32069 make_resolver_func (const tree default_decl,
32070 const tree dispatch_decl,
32071 basic_block *empty_bb)
32073 char *resolver_name;
32074 tree decl, type, decl_name, t;
32075 bool is_uniq = false;
32077 /* IFUNC's have to be globally visible. So, if the default_decl is
32078 not, then the name of the IFUNC should be made unique. */
32079 if (TREE_PUBLIC (default_decl) == 0)
32080 is_uniq = true;
32082 /* Append the filename to the resolver function if the versions are
32083 not externally visible. This is because the resolver function has
32084 to be externally visible for the loader to find it. So, appending
32085 the filename will prevent conflicts with a resolver function from
32086 another module which is based on the same version name. */
32087 resolver_name = make_name (default_decl, "resolver", is_uniq);
32089 /* The resolver function should return a (void *). */
32090 type = build_function_type_list (ptr_type_node, NULL_TREE);
32092 decl = build_fn_decl (resolver_name, type);
32093 decl_name = get_identifier (resolver_name);
32094 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32096 DECL_NAME (decl) = decl_name;
32097 TREE_USED (decl) = 1;
32098 DECL_ARTIFICIAL (decl) = 1;
32099 DECL_IGNORED_P (decl) = 0;
32100 /* IFUNC resolvers have to be externally visible. */
32101 TREE_PUBLIC (decl) = 1;
32102 DECL_UNINLINABLE (decl) = 1;
32104 /* Resolver is not external, body is generated. */
32105 DECL_EXTERNAL (decl) = 0;
32106 DECL_EXTERNAL (dispatch_decl) = 0;
32108 DECL_CONTEXT (decl) = NULL_TREE;
32109 DECL_INITIAL (decl) = make_node (BLOCK);
32110 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32112 if (DECL_COMDAT_GROUP (default_decl)
32113 || TREE_PUBLIC (default_decl))
32115 /* In this case, each translation unit with a call to this
32116 versioned function will put out a resolver. Ensure it
32117 is comdat to keep just one copy. */
32118 DECL_COMDAT (decl) = 1;
32119 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32121 /* Build result decl and add to function_decl. */
32122 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32123 DECL_ARTIFICIAL (t) = 1;
32124 DECL_IGNORED_P (t) = 1;
32125 DECL_RESULT (decl) = t;
32127 gimplify_function_tree (decl);
32128 push_cfun (DECL_STRUCT_FUNCTION (decl));
32129 *empty_bb = init_lowered_empty_function (decl, false);
32131 cgraph_add_new_function (decl, true);
32132 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32134 pop_cfun ();
32136 gcc_assert (dispatch_decl != NULL);
32137 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32138 DECL_ATTRIBUTES (dispatch_decl)
32139 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32141 /* Create the alias for dispatch to resolver here. */
32142 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32143 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32144 XDELETEVEC (resolver_name);
32145 return decl;
32148 /* Generate the dispatching code body to dispatch multi-versioned function
32149 DECL. The target hook is called to process the "target" attributes and
32150 provide the code to dispatch the right function at run-time. NODE points
32151 to the dispatcher decl whose body will be created. */
32153 static tree
32154 ix86_generate_version_dispatcher_body (void *node_p)
32156 tree resolver_decl;
32157 basic_block empty_bb;
32158 tree default_ver_decl;
32159 struct cgraph_node *versn;
32160 struct cgraph_node *node;
32162 struct cgraph_function_version_info *node_version_info = NULL;
32163 struct cgraph_function_version_info *versn_info = NULL;
32165 node = (cgraph_node *)node_p;
32167 node_version_info = get_cgraph_node_version (node);
32168 gcc_assert (node->dispatcher_function
32169 && node_version_info != NULL);
32171 if (node_version_info->dispatcher_resolver)
32172 return node_version_info->dispatcher_resolver;
32174 /* The first version in the chain corresponds to the default version. */
32175 default_ver_decl = node_version_info->next->this_node->decl;
32177 /* node is going to be an alias, so remove the finalized bit. */
32178 node->definition = false;
32180 resolver_decl = make_resolver_func (default_ver_decl,
32181 node->decl, &empty_bb);
32183 node_version_info->dispatcher_resolver = resolver_decl;
32185 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32187 auto_vec<tree, 2> fn_ver_vec;
32189 for (versn_info = node_version_info->next; versn_info;
32190 versn_info = versn_info->next)
32192 versn = versn_info->this_node;
32193 /* Check for virtual functions here again, as by this time it should
32194 have been determined if this function needs a vtable index or
32195 not. This happens for methods in derived classes that override
32196 virtual methods in base classes but are not explicitly marked as
32197 virtual. */
32198 if (DECL_VINDEX (versn->decl))
32199 sorry ("Virtual function multiversioning not supported");
32201 fn_ver_vec.safe_push (versn->decl);
32204 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32205 rebuild_cgraph_edges ();
32206 pop_cfun ();
32207 return resolver_decl;
32209 /* This builds the processor_model struct type defined in
32210 libgcc/config/i386/cpuinfo.c */
32212 static tree
32213 build_processor_model_struct (void)
32215 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32216 "__cpu_features"};
32217 tree field = NULL_TREE, field_chain = NULL_TREE;
32218 int i;
32219 tree type = make_node (RECORD_TYPE);
32221 /* The first 3 fields are unsigned int. */
32222 for (i = 0; i < 3; ++i)
32224 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32225 get_identifier (field_name[i]), unsigned_type_node);
32226 if (field_chain != NULL_TREE)
32227 DECL_CHAIN (field) = field_chain;
32228 field_chain = field;
32231 /* The last field is an array of unsigned integers of size one. */
32232 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32233 get_identifier (field_name[3]),
32234 build_array_type (unsigned_type_node,
32235 build_index_type (size_one_node)));
32236 if (field_chain != NULL_TREE)
32237 DECL_CHAIN (field) = field_chain;
32238 field_chain = field;
32240 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32241 return type;
32244 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32246 static tree
32247 make_var_decl (tree type, const char *name)
32249 tree new_decl;
32251 new_decl = build_decl (UNKNOWN_LOCATION,
32252 VAR_DECL,
32253 get_identifier(name),
32254 type);
32256 DECL_EXTERNAL (new_decl) = 1;
32257 TREE_STATIC (new_decl) = 1;
32258 TREE_PUBLIC (new_decl) = 1;
32259 DECL_INITIAL (new_decl) = 0;
32260 DECL_ARTIFICIAL (new_decl) = 0;
32261 DECL_PRESERVE_P (new_decl) = 1;
32263 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32264 assemble_variable (new_decl, 0, 0, 0);
32266 return new_decl;
32269 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32270 into an integer defined in libgcc/config/i386/cpuinfo.c */
32272 static tree
32273 fold_builtin_cpu (tree fndecl, tree *args)
32275 unsigned int i;
32276 enum ix86_builtins fn_code = (enum ix86_builtins)
32277 DECL_FUNCTION_CODE (fndecl);
32278 tree param_string_cst = NULL;
32280 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32281 enum processor_features
32283 F_CMOV = 0,
32284 F_MMX,
32285 F_POPCNT,
32286 F_SSE,
32287 F_SSE2,
32288 F_SSE3,
32289 F_SSSE3,
32290 F_SSE4_1,
32291 F_SSE4_2,
32292 F_AVX,
32293 F_AVX2,
32294 F_SSE4_A,
32295 F_FMA4,
32296 F_XOP,
32297 F_FMA,
32298 F_MAX
32301 /* These are the values for vendor types and cpu types and subtypes
32302 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32303 the corresponding start value. */
32304 enum processor_model
32306 M_INTEL = 1,
32307 M_AMD,
32308 M_CPU_TYPE_START,
32309 M_INTEL_BONNELL,
32310 M_INTEL_CORE2,
32311 M_INTEL_COREI7,
32312 M_AMDFAM10H,
32313 M_AMDFAM15H,
32314 M_INTEL_SILVERMONT,
32315 M_AMD_BTVER1,
32316 M_AMD_BTVER2,
32317 M_CPU_SUBTYPE_START,
32318 M_INTEL_COREI7_NEHALEM,
32319 M_INTEL_COREI7_WESTMERE,
32320 M_INTEL_COREI7_SANDYBRIDGE,
32321 M_AMDFAM10H_BARCELONA,
32322 M_AMDFAM10H_SHANGHAI,
32323 M_AMDFAM10H_ISTANBUL,
32324 M_AMDFAM15H_BDVER1,
32325 M_AMDFAM15H_BDVER2,
32326 M_AMDFAM15H_BDVER3,
32327 M_AMDFAM15H_BDVER4,
32328 M_INTEL_COREI7_IVYBRIDGE,
32329 M_INTEL_COREI7_HASWELL
32332 static struct _arch_names_table
32334 const char *const name;
32335 const enum processor_model model;
32337 const arch_names_table[] =
32339 {"amd", M_AMD},
32340 {"intel", M_INTEL},
32341 {"atom", M_INTEL_BONNELL},
32342 {"slm", M_INTEL_SILVERMONT},
32343 {"core2", M_INTEL_CORE2},
32344 {"corei7", M_INTEL_COREI7},
32345 {"nehalem", M_INTEL_COREI7_NEHALEM},
32346 {"westmere", M_INTEL_COREI7_WESTMERE},
32347 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32348 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32349 {"haswell", M_INTEL_COREI7_HASWELL},
32350 {"bonnell", M_INTEL_BONNELL},
32351 {"silvermont", M_INTEL_SILVERMONT},
32352 {"amdfam10h", M_AMDFAM10H},
32353 {"barcelona", M_AMDFAM10H_BARCELONA},
32354 {"shanghai", M_AMDFAM10H_SHANGHAI},
32355 {"istanbul", M_AMDFAM10H_ISTANBUL},
32356 {"btver1", M_AMD_BTVER1},
32357 {"amdfam15h", M_AMDFAM15H},
32358 {"bdver1", M_AMDFAM15H_BDVER1},
32359 {"bdver2", M_AMDFAM15H_BDVER2},
32360 {"bdver3", M_AMDFAM15H_BDVER3},
32361 {"bdver4", M_AMDFAM15H_BDVER4},
32362 {"btver2", M_AMD_BTVER2},
32365 static struct _isa_names_table
32367 const char *const name;
32368 const enum processor_features feature;
32370 const isa_names_table[] =
32372 {"cmov", F_CMOV},
32373 {"mmx", F_MMX},
32374 {"popcnt", F_POPCNT},
32375 {"sse", F_SSE},
32376 {"sse2", F_SSE2},
32377 {"sse3", F_SSE3},
32378 {"ssse3", F_SSSE3},
32379 {"sse4a", F_SSE4_A},
32380 {"sse4.1", F_SSE4_1},
32381 {"sse4.2", F_SSE4_2},
32382 {"avx", F_AVX},
32383 {"fma4", F_FMA4},
32384 {"xop", F_XOP},
32385 {"fma", F_FMA},
32386 {"avx2", F_AVX2}
32389 tree __processor_model_type = build_processor_model_struct ();
32390 tree __cpu_model_var = make_var_decl (__processor_model_type,
32391 "__cpu_model");
32394 varpool_add_new_variable (__cpu_model_var);
32396 gcc_assert ((args != NULL) && (*args != NULL));
32398 param_string_cst = *args;
32399 while (param_string_cst
32400 && TREE_CODE (param_string_cst) != STRING_CST)
32402 /* *args must be a expr that can contain other EXPRS leading to a
32403 STRING_CST. */
32404 if (!EXPR_P (param_string_cst))
32406 error ("Parameter to builtin must be a string constant or literal");
32407 return integer_zero_node;
32409 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32412 gcc_assert (param_string_cst);
32414 if (fn_code == IX86_BUILTIN_CPU_IS)
32416 tree ref;
32417 tree field;
32418 tree final;
32420 unsigned int field_val = 0;
32421 unsigned int NUM_ARCH_NAMES
32422 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32424 for (i = 0; i < NUM_ARCH_NAMES; i++)
32425 if (strcmp (arch_names_table[i].name,
32426 TREE_STRING_POINTER (param_string_cst)) == 0)
32427 break;
32429 if (i == NUM_ARCH_NAMES)
32431 error ("Parameter to builtin not valid: %s",
32432 TREE_STRING_POINTER (param_string_cst));
32433 return integer_zero_node;
32436 field = TYPE_FIELDS (__processor_model_type);
32437 field_val = arch_names_table[i].model;
32439 /* CPU types are stored in the next field. */
32440 if (field_val > M_CPU_TYPE_START
32441 && field_val < M_CPU_SUBTYPE_START)
32443 field = DECL_CHAIN (field);
32444 field_val -= M_CPU_TYPE_START;
32447 /* CPU subtypes are stored in the next field. */
32448 if (field_val > M_CPU_SUBTYPE_START)
32450 field = DECL_CHAIN ( DECL_CHAIN (field));
32451 field_val -= M_CPU_SUBTYPE_START;
32454 /* Get the appropriate field in __cpu_model. */
32455 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32456 field, NULL_TREE);
32458 /* Check the value. */
32459 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32460 build_int_cstu (unsigned_type_node, field_val));
32461 return build1 (CONVERT_EXPR, integer_type_node, final);
32463 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32465 tree ref;
32466 tree array_elt;
32467 tree field;
32468 tree final;
32470 unsigned int field_val = 0;
32471 unsigned int NUM_ISA_NAMES
32472 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32474 for (i = 0; i < NUM_ISA_NAMES; i++)
32475 if (strcmp (isa_names_table[i].name,
32476 TREE_STRING_POINTER (param_string_cst)) == 0)
32477 break;
32479 if (i == NUM_ISA_NAMES)
32481 error ("Parameter to builtin not valid: %s",
32482 TREE_STRING_POINTER (param_string_cst));
32483 return integer_zero_node;
32486 field = TYPE_FIELDS (__processor_model_type);
32487 /* Get the last field, which is __cpu_features. */
32488 while (DECL_CHAIN (field))
32489 field = DECL_CHAIN (field);
32491 /* Get the appropriate field: __cpu_model.__cpu_features */
32492 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32493 field, NULL_TREE);
32495 /* Access the 0th element of __cpu_features array. */
32496 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32497 integer_zero_node, NULL_TREE, NULL_TREE);
32499 field_val = (1 << isa_names_table[i].feature);
32500 /* Return __cpu_model.__cpu_features[0] & field_val */
32501 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32502 build_int_cstu (unsigned_type_node, field_val));
32503 return build1 (CONVERT_EXPR, integer_type_node, final);
32505 gcc_unreachable ();
32508 static tree
32509 ix86_fold_builtin (tree fndecl, int n_args,
32510 tree *args, bool ignore ATTRIBUTE_UNUSED)
32512 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32514 enum ix86_builtins fn_code = (enum ix86_builtins)
32515 DECL_FUNCTION_CODE (fndecl);
32516 if (fn_code == IX86_BUILTIN_CPU_IS
32517 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32519 gcc_assert (n_args == 1);
32520 return fold_builtin_cpu (fndecl, args);
32524 #ifdef SUBTARGET_FOLD_BUILTIN
32525 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32526 #endif
32528 return NULL_TREE;
32531 /* Make builtins to detect cpu type and features supported. NAME is
32532 the builtin name, CODE is the builtin code, and FTYPE is the function
32533 type of the builtin. */
32535 static void
32536 make_cpu_type_builtin (const char* name, int code,
32537 enum ix86_builtin_func_type ftype, bool is_const)
32539 tree decl;
32540 tree type;
32542 type = ix86_get_builtin_func_type (ftype);
32543 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32544 NULL, NULL_TREE);
32545 gcc_assert (decl != NULL_TREE);
32546 ix86_builtins[(int) code] = decl;
32547 TREE_READONLY (decl) = is_const;
32550 /* Make builtins to get CPU type and features supported. The created
32551 builtins are :
32553 __builtin_cpu_init (), to detect cpu type and features,
32554 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32555 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32558 static void
32559 ix86_init_platform_type_builtins (void)
32561 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32562 INT_FTYPE_VOID, false);
32563 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32564 INT_FTYPE_PCCHAR, true);
32565 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32566 INT_FTYPE_PCCHAR, true);
32569 /* Internal method for ix86_init_builtins. */
32571 static void
32572 ix86_init_builtins_va_builtins_abi (void)
32574 tree ms_va_ref, sysv_va_ref;
32575 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32576 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32577 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32578 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32580 if (!TARGET_64BIT)
32581 return;
32582 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32583 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32584 ms_va_ref = build_reference_type (ms_va_list_type_node);
32585 sysv_va_ref =
32586 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32588 fnvoid_va_end_ms =
32589 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32590 fnvoid_va_start_ms =
32591 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32592 fnvoid_va_end_sysv =
32593 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32594 fnvoid_va_start_sysv =
32595 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32596 NULL_TREE);
32597 fnvoid_va_copy_ms =
32598 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32599 NULL_TREE);
32600 fnvoid_va_copy_sysv =
32601 build_function_type_list (void_type_node, sysv_va_ref,
32602 sysv_va_ref, NULL_TREE);
32604 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32605 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32606 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32607 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32608 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32609 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32610 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32611 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32612 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32613 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32614 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32615 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32618 static void
32619 ix86_init_builtin_types (void)
32621 tree float128_type_node, float80_type_node;
32623 /* The __float80 type. */
32624 float80_type_node = long_double_type_node;
32625 if (TYPE_MODE (float80_type_node) != XFmode)
32627 /* The __float80 type. */
32628 float80_type_node = make_node (REAL_TYPE);
32630 TYPE_PRECISION (float80_type_node) = 80;
32631 layout_type (float80_type_node);
32633 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32635 /* The __float128 type. */
32636 float128_type_node = make_node (REAL_TYPE);
32637 TYPE_PRECISION (float128_type_node) = 128;
32638 layout_type (float128_type_node);
32639 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32641 /* This macro is built by i386-builtin-types.awk. */
32642 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32645 static void
32646 ix86_init_builtins (void)
32648 tree t;
32650 ix86_init_builtin_types ();
32652 /* Builtins to get CPU type and features. */
32653 ix86_init_platform_type_builtins ();
32655 /* TFmode support builtins. */
32656 def_builtin_const (0, "__builtin_infq",
32657 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32658 def_builtin_const (0, "__builtin_huge_valq",
32659 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32661 /* We will expand them to normal call if SSE isn't available since
32662 they are used by libgcc. */
32663 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32664 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32665 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32666 TREE_READONLY (t) = 1;
32667 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32669 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32670 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32671 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32672 TREE_READONLY (t) = 1;
32673 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32675 ix86_init_tm_builtins ();
32676 ix86_init_mmx_sse_builtins ();
32678 if (TARGET_LP64)
32679 ix86_init_builtins_va_builtins_abi ();
32681 #ifdef SUBTARGET_INIT_BUILTINS
32682 SUBTARGET_INIT_BUILTINS;
32683 #endif
32686 /* Return the ix86 builtin for CODE. */
32688 static tree
32689 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32691 if (code >= IX86_BUILTIN_MAX)
32692 return error_mark_node;
32694 return ix86_builtins[code];
32697 /* Errors in the source file can cause expand_expr to return const0_rtx
32698 where we expect a vector. To avoid crashing, use one of the vector
32699 clear instructions. */
32700 static rtx
32701 safe_vector_operand (rtx x, enum machine_mode mode)
32703 if (x == const0_rtx)
32704 x = CONST0_RTX (mode);
32705 return x;
32708 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32710 static rtx
32711 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32713 rtx pat;
32714 tree arg0 = CALL_EXPR_ARG (exp, 0);
32715 tree arg1 = CALL_EXPR_ARG (exp, 1);
32716 rtx op0 = expand_normal (arg0);
32717 rtx op1 = expand_normal (arg1);
32718 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32719 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32720 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32722 if (VECTOR_MODE_P (mode0))
32723 op0 = safe_vector_operand (op0, mode0);
32724 if (VECTOR_MODE_P (mode1))
32725 op1 = safe_vector_operand (op1, mode1);
32727 if (optimize || !target
32728 || GET_MODE (target) != tmode
32729 || !insn_data[icode].operand[0].predicate (target, tmode))
32730 target = gen_reg_rtx (tmode);
32732 if (GET_MODE (op1) == SImode && mode1 == TImode)
32734 rtx x = gen_reg_rtx (V4SImode);
32735 emit_insn (gen_sse2_loadd (x, op1));
32736 op1 = gen_lowpart (TImode, x);
32739 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32740 op0 = copy_to_mode_reg (mode0, op0);
32741 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32742 op1 = copy_to_mode_reg (mode1, op1);
32744 pat = GEN_FCN (icode) (target, op0, op1);
32745 if (! pat)
32746 return 0;
32748 emit_insn (pat);
32750 return target;
32753 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32755 static rtx
32756 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32757 enum ix86_builtin_func_type m_type,
32758 enum rtx_code sub_code)
32760 rtx pat;
32761 int i;
32762 int nargs;
32763 bool comparison_p = false;
32764 bool tf_p = false;
32765 bool last_arg_constant = false;
32766 int num_memory = 0;
32767 struct {
32768 rtx op;
32769 enum machine_mode mode;
32770 } args[4];
32772 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32774 switch (m_type)
32776 case MULTI_ARG_4_DF2_DI_I:
32777 case MULTI_ARG_4_DF2_DI_I1:
32778 case MULTI_ARG_4_SF2_SI_I:
32779 case MULTI_ARG_4_SF2_SI_I1:
32780 nargs = 4;
32781 last_arg_constant = true;
32782 break;
32784 case MULTI_ARG_3_SF:
32785 case MULTI_ARG_3_DF:
32786 case MULTI_ARG_3_SF2:
32787 case MULTI_ARG_3_DF2:
32788 case MULTI_ARG_3_DI:
32789 case MULTI_ARG_3_SI:
32790 case MULTI_ARG_3_SI_DI:
32791 case MULTI_ARG_3_HI:
32792 case MULTI_ARG_3_HI_SI:
32793 case MULTI_ARG_3_QI:
32794 case MULTI_ARG_3_DI2:
32795 case MULTI_ARG_3_SI2:
32796 case MULTI_ARG_3_HI2:
32797 case MULTI_ARG_3_QI2:
32798 nargs = 3;
32799 break;
32801 case MULTI_ARG_2_SF:
32802 case MULTI_ARG_2_DF:
32803 case MULTI_ARG_2_DI:
32804 case MULTI_ARG_2_SI:
32805 case MULTI_ARG_2_HI:
32806 case MULTI_ARG_2_QI:
32807 nargs = 2;
32808 break;
32810 case MULTI_ARG_2_DI_IMM:
32811 case MULTI_ARG_2_SI_IMM:
32812 case MULTI_ARG_2_HI_IMM:
32813 case MULTI_ARG_2_QI_IMM:
32814 nargs = 2;
32815 last_arg_constant = true;
32816 break;
32818 case MULTI_ARG_1_SF:
32819 case MULTI_ARG_1_DF:
32820 case MULTI_ARG_1_SF2:
32821 case MULTI_ARG_1_DF2:
32822 case MULTI_ARG_1_DI:
32823 case MULTI_ARG_1_SI:
32824 case MULTI_ARG_1_HI:
32825 case MULTI_ARG_1_QI:
32826 case MULTI_ARG_1_SI_DI:
32827 case MULTI_ARG_1_HI_DI:
32828 case MULTI_ARG_1_HI_SI:
32829 case MULTI_ARG_1_QI_DI:
32830 case MULTI_ARG_1_QI_SI:
32831 case MULTI_ARG_1_QI_HI:
32832 nargs = 1;
32833 break;
32835 case MULTI_ARG_2_DI_CMP:
32836 case MULTI_ARG_2_SI_CMP:
32837 case MULTI_ARG_2_HI_CMP:
32838 case MULTI_ARG_2_QI_CMP:
32839 nargs = 2;
32840 comparison_p = true;
32841 break;
32843 case MULTI_ARG_2_SF_TF:
32844 case MULTI_ARG_2_DF_TF:
32845 case MULTI_ARG_2_DI_TF:
32846 case MULTI_ARG_2_SI_TF:
32847 case MULTI_ARG_2_HI_TF:
32848 case MULTI_ARG_2_QI_TF:
32849 nargs = 2;
32850 tf_p = true;
32851 break;
32853 default:
32854 gcc_unreachable ();
32857 if (optimize || !target
32858 || GET_MODE (target) != tmode
32859 || !insn_data[icode].operand[0].predicate (target, tmode))
32860 target = gen_reg_rtx (tmode);
32862 gcc_assert (nargs <= 4);
32864 for (i = 0; i < nargs; i++)
32866 tree arg = CALL_EXPR_ARG (exp, i);
32867 rtx op = expand_normal (arg);
32868 int adjust = (comparison_p) ? 1 : 0;
32869 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32871 if (last_arg_constant && i == nargs - 1)
32873 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32875 enum insn_code new_icode = icode;
32876 switch (icode)
32878 case CODE_FOR_xop_vpermil2v2df3:
32879 case CODE_FOR_xop_vpermil2v4sf3:
32880 case CODE_FOR_xop_vpermil2v4df3:
32881 case CODE_FOR_xop_vpermil2v8sf3:
32882 error ("the last argument must be a 2-bit immediate");
32883 return gen_reg_rtx (tmode);
32884 case CODE_FOR_xop_rotlv2di3:
32885 new_icode = CODE_FOR_rotlv2di3;
32886 goto xop_rotl;
32887 case CODE_FOR_xop_rotlv4si3:
32888 new_icode = CODE_FOR_rotlv4si3;
32889 goto xop_rotl;
32890 case CODE_FOR_xop_rotlv8hi3:
32891 new_icode = CODE_FOR_rotlv8hi3;
32892 goto xop_rotl;
32893 case CODE_FOR_xop_rotlv16qi3:
32894 new_icode = CODE_FOR_rotlv16qi3;
32895 xop_rotl:
32896 if (CONST_INT_P (op))
32898 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32899 op = GEN_INT (INTVAL (op) & mask);
32900 gcc_checking_assert
32901 (insn_data[icode].operand[i + 1].predicate (op, mode));
32903 else
32905 gcc_checking_assert
32906 (nargs == 2
32907 && insn_data[new_icode].operand[0].mode == tmode
32908 && insn_data[new_icode].operand[1].mode == tmode
32909 && insn_data[new_icode].operand[2].mode == mode
32910 && insn_data[new_icode].operand[0].predicate
32911 == insn_data[icode].operand[0].predicate
32912 && insn_data[new_icode].operand[1].predicate
32913 == insn_data[icode].operand[1].predicate);
32914 icode = new_icode;
32915 goto non_constant;
32917 break;
32918 default:
32919 gcc_unreachable ();
32923 else
32925 non_constant:
32926 if (VECTOR_MODE_P (mode))
32927 op = safe_vector_operand (op, mode);
32929 /* If we aren't optimizing, only allow one memory operand to be
32930 generated. */
32931 if (memory_operand (op, mode))
32932 num_memory++;
32934 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32936 if (optimize
32937 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32938 || num_memory > 1)
32939 op = force_reg (mode, op);
32942 args[i].op = op;
32943 args[i].mode = mode;
32946 switch (nargs)
32948 case 1:
32949 pat = GEN_FCN (icode) (target, args[0].op);
32950 break;
32952 case 2:
32953 if (tf_p)
32954 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32955 GEN_INT ((int)sub_code));
32956 else if (! comparison_p)
32957 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32958 else
32960 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32961 args[0].op,
32962 args[1].op);
32964 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32966 break;
32968 case 3:
32969 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32970 break;
32972 case 4:
32973 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32974 break;
32976 default:
32977 gcc_unreachable ();
32980 if (! pat)
32981 return 0;
32983 emit_insn (pat);
32984 return target;
32987 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32988 insns with vec_merge. */
32990 static rtx
32991 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32992 rtx target)
32994 rtx pat;
32995 tree arg0 = CALL_EXPR_ARG (exp, 0);
32996 rtx op1, op0 = expand_normal (arg0);
32997 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32998 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33000 if (optimize || !target
33001 || GET_MODE (target) != tmode
33002 || !insn_data[icode].operand[0].predicate (target, tmode))
33003 target = gen_reg_rtx (tmode);
33005 if (VECTOR_MODE_P (mode0))
33006 op0 = safe_vector_operand (op0, mode0);
33008 if ((optimize && !register_operand (op0, mode0))
33009 || !insn_data[icode].operand[1].predicate (op0, mode0))
33010 op0 = copy_to_mode_reg (mode0, op0);
33012 op1 = op0;
33013 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33014 op1 = copy_to_mode_reg (mode0, op1);
33016 pat = GEN_FCN (icode) (target, op0, op1);
33017 if (! pat)
33018 return 0;
33019 emit_insn (pat);
33020 return target;
33023 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33025 static rtx
33026 ix86_expand_sse_compare (const struct builtin_description *d,
33027 tree exp, rtx target, bool swap)
33029 rtx pat;
33030 tree arg0 = CALL_EXPR_ARG (exp, 0);
33031 tree arg1 = CALL_EXPR_ARG (exp, 1);
33032 rtx op0 = expand_normal (arg0);
33033 rtx op1 = expand_normal (arg1);
33034 rtx op2;
33035 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33036 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33037 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33038 enum rtx_code comparison = d->comparison;
33040 if (VECTOR_MODE_P (mode0))
33041 op0 = safe_vector_operand (op0, mode0);
33042 if (VECTOR_MODE_P (mode1))
33043 op1 = safe_vector_operand (op1, mode1);
33045 /* Swap operands if we have a comparison that isn't available in
33046 hardware. */
33047 if (swap)
33049 rtx tmp = gen_reg_rtx (mode1);
33050 emit_move_insn (tmp, op1);
33051 op1 = op0;
33052 op0 = tmp;
33055 if (optimize || !target
33056 || GET_MODE (target) != tmode
33057 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33058 target = gen_reg_rtx (tmode);
33060 if ((optimize && !register_operand (op0, mode0))
33061 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33062 op0 = copy_to_mode_reg (mode0, op0);
33063 if ((optimize && !register_operand (op1, mode1))
33064 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33065 op1 = copy_to_mode_reg (mode1, op1);
33067 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33068 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33069 if (! pat)
33070 return 0;
33071 emit_insn (pat);
33072 return target;
33075 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33077 static rtx
33078 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33079 rtx target)
33081 rtx pat;
33082 tree arg0 = CALL_EXPR_ARG (exp, 0);
33083 tree arg1 = CALL_EXPR_ARG (exp, 1);
33084 rtx op0 = expand_normal (arg0);
33085 rtx op1 = expand_normal (arg1);
33086 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33087 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33088 enum rtx_code comparison = d->comparison;
33090 if (VECTOR_MODE_P (mode0))
33091 op0 = safe_vector_operand (op0, mode0);
33092 if (VECTOR_MODE_P (mode1))
33093 op1 = safe_vector_operand (op1, mode1);
33095 /* Swap operands if we have a comparison that isn't available in
33096 hardware. */
33097 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33099 rtx tmp = op1;
33100 op1 = op0;
33101 op0 = tmp;
33104 target = gen_reg_rtx (SImode);
33105 emit_move_insn (target, const0_rtx);
33106 target = gen_rtx_SUBREG (QImode, target, 0);
33108 if ((optimize && !register_operand (op0, mode0))
33109 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33110 op0 = copy_to_mode_reg (mode0, op0);
33111 if ((optimize && !register_operand (op1, mode1))
33112 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33113 op1 = copy_to_mode_reg (mode1, op1);
33115 pat = GEN_FCN (d->icode) (op0, op1);
33116 if (! pat)
33117 return 0;
33118 emit_insn (pat);
33119 emit_insn (gen_rtx_SET (VOIDmode,
33120 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33121 gen_rtx_fmt_ee (comparison, QImode,
33122 SET_DEST (pat),
33123 const0_rtx)));
33125 return SUBREG_REG (target);
33128 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33130 static rtx
33131 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33132 rtx target)
33134 rtx pat;
33135 tree arg0 = CALL_EXPR_ARG (exp, 0);
33136 rtx op1, op0 = expand_normal (arg0);
33137 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33138 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33140 if (optimize || target == 0
33141 || GET_MODE (target) != tmode
33142 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33143 target = gen_reg_rtx (tmode);
33145 if (VECTOR_MODE_P (mode0))
33146 op0 = safe_vector_operand (op0, mode0);
33148 if ((optimize && !register_operand (op0, mode0))
33149 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33150 op0 = copy_to_mode_reg (mode0, op0);
33152 op1 = GEN_INT (d->comparison);
33154 pat = GEN_FCN (d->icode) (target, op0, op1);
33155 if (! pat)
33156 return 0;
33157 emit_insn (pat);
33158 return target;
33161 static rtx
33162 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33163 tree exp, rtx target)
33165 rtx pat;
33166 tree arg0 = CALL_EXPR_ARG (exp, 0);
33167 tree arg1 = CALL_EXPR_ARG (exp, 1);
33168 rtx op0 = expand_normal (arg0);
33169 rtx op1 = expand_normal (arg1);
33170 rtx op2;
33171 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33172 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33173 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33175 if (optimize || target == 0
33176 || GET_MODE (target) != tmode
33177 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33178 target = gen_reg_rtx (tmode);
33180 op0 = safe_vector_operand (op0, mode0);
33181 op1 = safe_vector_operand (op1, mode1);
33183 if ((optimize && !register_operand (op0, mode0))
33184 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33185 op0 = copy_to_mode_reg (mode0, op0);
33186 if ((optimize && !register_operand (op1, mode1))
33187 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33188 op1 = copy_to_mode_reg (mode1, op1);
33190 op2 = GEN_INT (d->comparison);
33192 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33193 if (! pat)
33194 return 0;
33195 emit_insn (pat);
33196 return target;
33199 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33201 static rtx
33202 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33203 rtx target)
33205 rtx pat;
33206 tree arg0 = CALL_EXPR_ARG (exp, 0);
33207 tree arg1 = CALL_EXPR_ARG (exp, 1);
33208 rtx op0 = expand_normal (arg0);
33209 rtx op1 = expand_normal (arg1);
33210 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33211 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33212 enum rtx_code comparison = d->comparison;
33214 if (VECTOR_MODE_P (mode0))
33215 op0 = safe_vector_operand (op0, mode0);
33216 if (VECTOR_MODE_P (mode1))
33217 op1 = safe_vector_operand (op1, mode1);
33219 target = gen_reg_rtx (SImode);
33220 emit_move_insn (target, const0_rtx);
33221 target = gen_rtx_SUBREG (QImode, target, 0);
33223 if ((optimize && !register_operand (op0, mode0))
33224 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33225 op0 = copy_to_mode_reg (mode0, op0);
33226 if ((optimize && !register_operand (op1, mode1))
33227 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33228 op1 = copy_to_mode_reg (mode1, op1);
33230 pat = GEN_FCN (d->icode) (op0, op1);
33231 if (! pat)
33232 return 0;
33233 emit_insn (pat);
33234 emit_insn (gen_rtx_SET (VOIDmode,
33235 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33236 gen_rtx_fmt_ee (comparison, QImode,
33237 SET_DEST (pat),
33238 const0_rtx)));
33240 return SUBREG_REG (target);
33243 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33245 static rtx
33246 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33247 tree exp, rtx target)
33249 rtx pat;
33250 tree arg0 = CALL_EXPR_ARG (exp, 0);
33251 tree arg1 = CALL_EXPR_ARG (exp, 1);
33252 tree arg2 = CALL_EXPR_ARG (exp, 2);
33253 tree arg3 = CALL_EXPR_ARG (exp, 3);
33254 tree arg4 = CALL_EXPR_ARG (exp, 4);
33255 rtx scratch0, scratch1;
33256 rtx op0 = expand_normal (arg0);
33257 rtx op1 = expand_normal (arg1);
33258 rtx op2 = expand_normal (arg2);
33259 rtx op3 = expand_normal (arg3);
33260 rtx op4 = expand_normal (arg4);
33261 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33263 tmode0 = insn_data[d->icode].operand[0].mode;
33264 tmode1 = insn_data[d->icode].operand[1].mode;
33265 modev2 = insn_data[d->icode].operand[2].mode;
33266 modei3 = insn_data[d->icode].operand[3].mode;
33267 modev4 = insn_data[d->icode].operand[4].mode;
33268 modei5 = insn_data[d->icode].operand[5].mode;
33269 modeimm = insn_data[d->icode].operand[6].mode;
33271 if (VECTOR_MODE_P (modev2))
33272 op0 = safe_vector_operand (op0, modev2);
33273 if (VECTOR_MODE_P (modev4))
33274 op2 = safe_vector_operand (op2, modev4);
33276 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33277 op0 = copy_to_mode_reg (modev2, op0);
33278 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33279 op1 = copy_to_mode_reg (modei3, op1);
33280 if ((optimize && !register_operand (op2, modev4))
33281 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33282 op2 = copy_to_mode_reg (modev4, op2);
33283 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33284 op3 = copy_to_mode_reg (modei5, op3);
33286 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33288 error ("the fifth argument must be an 8-bit immediate");
33289 return const0_rtx;
33292 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33294 if (optimize || !target
33295 || GET_MODE (target) != tmode0
33296 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33297 target = gen_reg_rtx (tmode0);
33299 scratch1 = gen_reg_rtx (tmode1);
33301 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33303 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33305 if (optimize || !target
33306 || GET_MODE (target) != tmode1
33307 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33308 target = gen_reg_rtx (tmode1);
33310 scratch0 = gen_reg_rtx (tmode0);
33312 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33314 else
33316 gcc_assert (d->flag);
33318 scratch0 = gen_reg_rtx (tmode0);
33319 scratch1 = gen_reg_rtx (tmode1);
33321 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33324 if (! pat)
33325 return 0;
33327 emit_insn (pat);
33329 if (d->flag)
33331 target = gen_reg_rtx (SImode);
33332 emit_move_insn (target, const0_rtx);
33333 target = gen_rtx_SUBREG (QImode, target, 0);
33335 emit_insn
33336 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33337 gen_rtx_fmt_ee (EQ, QImode,
33338 gen_rtx_REG ((enum machine_mode) d->flag,
33339 FLAGS_REG),
33340 const0_rtx)));
33341 return SUBREG_REG (target);
33343 else
33344 return target;
33348 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33350 static rtx
33351 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33352 tree exp, rtx target)
33354 rtx pat;
33355 tree arg0 = CALL_EXPR_ARG (exp, 0);
33356 tree arg1 = CALL_EXPR_ARG (exp, 1);
33357 tree arg2 = CALL_EXPR_ARG (exp, 2);
33358 rtx scratch0, scratch1;
33359 rtx op0 = expand_normal (arg0);
33360 rtx op1 = expand_normal (arg1);
33361 rtx op2 = expand_normal (arg2);
33362 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33364 tmode0 = insn_data[d->icode].operand[0].mode;
33365 tmode1 = insn_data[d->icode].operand[1].mode;
33366 modev2 = insn_data[d->icode].operand[2].mode;
33367 modev3 = insn_data[d->icode].operand[3].mode;
33368 modeimm = insn_data[d->icode].operand[4].mode;
33370 if (VECTOR_MODE_P (modev2))
33371 op0 = safe_vector_operand (op0, modev2);
33372 if (VECTOR_MODE_P (modev3))
33373 op1 = safe_vector_operand (op1, modev3);
33375 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33376 op0 = copy_to_mode_reg (modev2, op0);
33377 if ((optimize && !register_operand (op1, modev3))
33378 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33379 op1 = copy_to_mode_reg (modev3, op1);
33381 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33383 error ("the third argument must be an 8-bit immediate");
33384 return const0_rtx;
33387 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33389 if (optimize || !target
33390 || GET_MODE (target) != tmode0
33391 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33392 target = gen_reg_rtx (tmode0);
33394 scratch1 = gen_reg_rtx (tmode1);
33396 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33398 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33400 if (optimize || !target
33401 || GET_MODE (target) != tmode1
33402 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33403 target = gen_reg_rtx (tmode1);
33405 scratch0 = gen_reg_rtx (tmode0);
33407 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33409 else
33411 gcc_assert (d->flag);
33413 scratch0 = gen_reg_rtx (tmode0);
33414 scratch1 = gen_reg_rtx (tmode1);
33416 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33419 if (! pat)
33420 return 0;
33422 emit_insn (pat);
33424 if (d->flag)
33426 target = gen_reg_rtx (SImode);
33427 emit_move_insn (target, const0_rtx);
33428 target = gen_rtx_SUBREG (QImode, target, 0);
33430 emit_insn
33431 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33432 gen_rtx_fmt_ee (EQ, QImode,
33433 gen_rtx_REG ((enum machine_mode) d->flag,
33434 FLAGS_REG),
33435 const0_rtx)));
33436 return SUBREG_REG (target);
33438 else
33439 return target;
33442 /* Subroutine of ix86_expand_builtin to take care of insns with
33443 variable number of operands. */
33445 static rtx
33446 ix86_expand_args_builtin (const struct builtin_description *d,
33447 tree exp, rtx target)
33449 rtx pat, real_target;
33450 unsigned int i, nargs;
33451 unsigned int nargs_constant = 0;
33452 unsigned int mask_pos = 0;
33453 int num_memory = 0;
33454 struct
33456 rtx op;
33457 enum machine_mode mode;
33458 } args[6];
33459 bool last_arg_count = false;
33460 enum insn_code icode = d->icode;
33461 const struct insn_data_d *insn_p = &insn_data[icode];
33462 enum machine_mode tmode = insn_p->operand[0].mode;
33463 enum machine_mode rmode = VOIDmode;
33464 bool swap = false;
33465 enum rtx_code comparison = d->comparison;
33467 switch ((enum ix86_builtin_func_type) d->flag)
33469 case V2DF_FTYPE_V2DF_ROUND:
33470 case V4DF_FTYPE_V4DF_ROUND:
33471 case V4SF_FTYPE_V4SF_ROUND:
33472 case V8SF_FTYPE_V8SF_ROUND:
33473 case V4SI_FTYPE_V4SF_ROUND:
33474 case V8SI_FTYPE_V8SF_ROUND:
33475 return ix86_expand_sse_round (d, exp, target);
33476 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33477 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33478 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33479 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33480 case INT_FTYPE_V8SF_V8SF_PTEST:
33481 case INT_FTYPE_V4DI_V4DI_PTEST:
33482 case INT_FTYPE_V4DF_V4DF_PTEST:
33483 case INT_FTYPE_V4SF_V4SF_PTEST:
33484 case INT_FTYPE_V2DI_V2DI_PTEST:
33485 case INT_FTYPE_V2DF_V2DF_PTEST:
33486 return ix86_expand_sse_ptest (d, exp, target);
33487 case FLOAT128_FTYPE_FLOAT128:
33488 case FLOAT_FTYPE_FLOAT:
33489 case INT_FTYPE_INT:
33490 case UINT64_FTYPE_INT:
33491 case UINT16_FTYPE_UINT16:
33492 case INT64_FTYPE_INT64:
33493 case INT64_FTYPE_V4SF:
33494 case INT64_FTYPE_V2DF:
33495 case INT_FTYPE_V16QI:
33496 case INT_FTYPE_V8QI:
33497 case INT_FTYPE_V8SF:
33498 case INT_FTYPE_V4DF:
33499 case INT_FTYPE_V4SF:
33500 case INT_FTYPE_V2DF:
33501 case INT_FTYPE_V32QI:
33502 case V16QI_FTYPE_V16QI:
33503 case V8SI_FTYPE_V8SF:
33504 case V8SI_FTYPE_V4SI:
33505 case V8HI_FTYPE_V8HI:
33506 case V8HI_FTYPE_V16QI:
33507 case V8QI_FTYPE_V8QI:
33508 case V8SF_FTYPE_V8SF:
33509 case V8SF_FTYPE_V8SI:
33510 case V8SF_FTYPE_V4SF:
33511 case V8SF_FTYPE_V8HI:
33512 case V4SI_FTYPE_V4SI:
33513 case V4SI_FTYPE_V16QI:
33514 case V4SI_FTYPE_V4SF:
33515 case V4SI_FTYPE_V8SI:
33516 case V4SI_FTYPE_V8HI:
33517 case V4SI_FTYPE_V4DF:
33518 case V4SI_FTYPE_V2DF:
33519 case V4HI_FTYPE_V4HI:
33520 case V4DF_FTYPE_V4DF:
33521 case V4DF_FTYPE_V4SI:
33522 case V4DF_FTYPE_V4SF:
33523 case V4DF_FTYPE_V2DF:
33524 case V4SF_FTYPE_V4SF:
33525 case V4SF_FTYPE_V4SI:
33526 case V4SF_FTYPE_V8SF:
33527 case V4SF_FTYPE_V4DF:
33528 case V4SF_FTYPE_V8HI:
33529 case V4SF_FTYPE_V2DF:
33530 case V2DI_FTYPE_V2DI:
33531 case V2DI_FTYPE_V16QI:
33532 case V2DI_FTYPE_V8HI:
33533 case V2DI_FTYPE_V4SI:
33534 case V2DF_FTYPE_V2DF:
33535 case V2DF_FTYPE_V4SI:
33536 case V2DF_FTYPE_V4DF:
33537 case V2DF_FTYPE_V4SF:
33538 case V2DF_FTYPE_V2SI:
33539 case V2SI_FTYPE_V2SI:
33540 case V2SI_FTYPE_V4SF:
33541 case V2SI_FTYPE_V2SF:
33542 case V2SI_FTYPE_V2DF:
33543 case V2SF_FTYPE_V2SF:
33544 case V2SF_FTYPE_V2SI:
33545 case V32QI_FTYPE_V32QI:
33546 case V32QI_FTYPE_V16QI:
33547 case V16HI_FTYPE_V16HI:
33548 case V16HI_FTYPE_V8HI:
33549 case V8SI_FTYPE_V8SI:
33550 case V16HI_FTYPE_V16QI:
33551 case V8SI_FTYPE_V16QI:
33552 case V4DI_FTYPE_V16QI:
33553 case V8SI_FTYPE_V8HI:
33554 case V4DI_FTYPE_V8HI:
33555 case V4DI_FTYPE_V4SI:
33556 case V4DI_FTYPE_V2DI:
33557 case HI_FTYPE_HI:
33558 case UINT_FTYPE_V2DF:
33559 case UINT_FTYPE_V4SF:
33560 case UINT64_FTYPE_V2DF:
33561 case UINT64_FTYPE_V4SF:
33562 case V16QI_FTYPE_V8DI:
33563 case V16HI_FTYPE_V16SI:
33564 case V16SI_FTYPE_HI:
33565 case V16SI_FTYPE_V16SI:
33566 case V16SI_FTYPE_INT:
33567 case V16SF_FTYPE_FLOAT:
33568 case V16SF_FTYPE_V4SF:
33569 case V16SF_FTYPE_V16SF:
33570 case V8HI_FTYPE_V8DI:
33571 case V8UHI_FTYPE_V8UHI:
33572 case V8SI_FTYPE_V8DI:
33573 case V8USI_FTYPE_V8USI:
33574 case V8SF_FTYPE_V8DF:
33575 case V8DI_FTYPE_QI:
33576 case V8DI_FTYPE_INT64:
33577 case V8DI_FTYPE_V4DI:
33578 case V8DI_FTYPE_V8DI:
33579 case V8DF_FTYPE_DOUBLE:
33580 case V8DF_FTYPE_V4DF:
33581 case V8DF_FTYPE_V8DF:
33582 case V8DF_FTYPE_V8SI:
33583 nargs = 1;
33584 break;
33585 case V4SF_FTYPE_V4SF_VEC_MERGE:
33586 case V2DF_FTYPE_V2DF_VEC_MERGE:
33587 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33588 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33589 case V16QI_FTYPE_V16QI_V16QI:
33590 case V16QI_FTYPE_V8HI_V8HI:
33591 case V16SI_FTYPE_V16SI_V16SI:
33592 case V16SF_FTYPE_V16SF_V16SF:
33593 case V16SF_FTYPE_V16SF_V16SI:
33594 case V8QI_FTYPE_V8QI_V8QI:
33595 case V8QI_FTYPE_V4HI_V4HI:
33596 case V8HI_FTYPE_V8HI_V8HI:
33597 case V8HI_FTYPE_V16QI_V16QI:
33598 case V8HI_FTYPE_V4SI_V4SI:
33599 case V8SF_FTYPE_V8SF_V8SF:
33600 case V8SF_FTYPE_V8SF_V8SI:
33601 case V8DI_FTYPE_V8DI_V8DI:
33602 case V8DF_FTYPE_V8DF_V8DF:
33603 case V8DF_FTYPE_V8DF_V8DI:
33604 case V4SI_FTYPE_V4SI_V4SI:
33605 case V4SI_FTYPE_V8HI_V8HI:
33606 case V4SI_FTYPE_V4SF_V4SF:
33607 case V4SI_FTYPE_V2DF_V2DF:
33608 case V4HI_FTYPE_V4HI_V4HI:
33609 case V4HI_FTYPE_V8QI_V8QI:
33610 case V4HI_FTYPE_V2SI_V2SI:
33611 case V4DF_FTYPE_V4DF_V4DF:
33612 case V4DF_FTYPE_V4DF_V4DI:
33613 case V4SF_FTYPE_V4SF_V4SF:
33614 case V4SF_FTYPE_V4SF_V4SI:
33615 case V4SF_FTYPE_V4SF_V2SI:
33616 case V4SF_FTYPE_V4SF_V2DF:
33617 case V4SF_FTYPE_V4SF_UINT:
33618 case V4SF_FTYPE_V4SF_UINT64:
33619 case V4SF_FTYPE_V4SF_DI:
33620 case V4SF_FTYPE_V4SF_SI:
33621 case V2DI_FTYPE_V2DI_V2DI:
33622 case V2DI_FTYPE_V16QI_V16QI:
33623 case V2DI_FTYPE_V4SI_V4SI:
33624 case V2UDI_FTYPE_V4USI_V4USI:
33625 case V2DI_FTYPE_V2DI_V16QI:
33626 case V2DI_FTYPE_V2DF_V2DF:
33627 case V2SI_FTYPE_V2SI_V2SI:
33628 case V2SI_FTYPE_V4HI_V4HI:
33629 case V2SI_FTYPE_V2SF_V2SF:
33630 case V2DF_FTYPE_V2DF_V2DF:
33631 case V2DF_FTYPE_V2DF_V4SF:
33632 case V2DF_FTYPE_V2DF_V2DI:
33633 case V2DF_FTYPE_V2DF_DI:
33634 case V2DF_FTYPE_V2DF_SI:
33635 case V2DF_FTYPE_V2DF_UINT:
33636 case V2DF_FTYPE_V2DF_UINT64:
33637 case V2SF_FTYPE_V2SF_V2SF:
33638 case V1DI_FTYPE_V1DI_V1DI:
33639 case V1DI_FTYPE_V8QI_V8QI:
33640 case V1DI_FTYPE_V2SI_V2SI:
33641 case V32QI_FTYPE_V16HI_V16HI:
33642 case V16HI_FTYPE_V8SI_V8SI:
33643 case V32QI_FTYPE_V32QI_V32QI:
33644 case V16HI_FTYPE_V32QI_V32QI:
33645 case V16HI_FTYPE_V16HI_V16HI:
33646 case V8SI_FTYPE_V4DF_V4DF:
33647 case V8SI_FTYPE_V8SI_V8SI:
33648 case V8SI_FTYPE_V16HI_V16HI:
33649 case V4DI_FTYPE_V4DI_V4DI:
33650 case V4DI_FTYPE_V8SI_V8SI:
33651 case V4UDI_FTYPE_V8USI_V8USI:
33652 case QI_FTYPE_V8DI_V8DI:
33653 case HI_FTYPE_V16SI_V16SI:
33654 if (comparison == UNKNOWN)
33655 return ix86_expand_binop_builtin (icode, exp, target);
33656 nargs = 2;
33657 break;
33658 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33659 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33660 gcc_assert (comparison != UNKNOWN);
33661 nargs = 2;
33662 swap = true;
33663 break;
33664 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33665 case V16HI_FTYPE_V16HI_SI_COUNT:
33666 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33667 case V8SI_FTYPE_V8SI_SI_COUNT:
33668 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33669 case V4DI_FTYPE_V4DI_INT_COUNT:
33670 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33671 case V8HI_FTYPE_V8HI_SI_COUNT:
33672 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33673 case V4SI_FTYPE_V4SI_SI_COUNT:
33674 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33675 case V4HI_FTYPE_V4HI_SI_COUNT:
33676 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33677 case V2DI_FTYPE_V2DI_SI_COUNT:
33678 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33679 case V2SI_FTYPE_V2SI_SI_COUNT:
33680 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33681 case V1DI_FTYPE_V1DI_SI_COUNT:
33682 nargs = 2;
33683 last_arg_count = true;
33684 break;
33685 case UINT64_FTYPE_UINT64_UINT64:
33686 case UINT_FTYPE_UINT_UINT:
33687 case UINT_FTYPE_UINT_USHORT:
33688 case UINT_FTYPE_UINT_UCHAR:
33689 case UINT16_FTYPE_UINT16_INT:
33690 case UINT8_FTYPE_UINT8_INT:
33691 case HI_FTYPE_HI_HI:
33692 case V16SI_FTYPE_V8DF_V8DF:
33693 nargs = 2;
33694 break;
33695 case V2DI_FTYPE_V2DI_INT_CONVERT:
33696 nargs = 2;
33697 rmode = V1TImode;
33698 nargs_constant = 1;
33699 break;
33700 case V4DI_FTYPE_V4DI_INT_CONVERT:
33701 nargs = 2;
33702 rmode = V2TImode;
33703 nargs_constant = 1;
33704 break;
33705 case V8HI_FTYPE_V8HI_INT:
33706 case V8HI_FTYPE_V8SF_INT:
33707 case V16HI_FTYPE_V16SF_INT:
33708 case V8HI_FTYPE_V4SF_INT:
33709 case V8SF_FTYPE_V8SF_INT:
33710 case V4SF_FTYPE_V16SF_INT:
33711 case V16SF_FTYPE_V16SF_INT:
33712 case V4SI_FTYPE_V4SI_INT:
33713 case V4SI_FTYPE_V8SI_INT:
33714 case V4HI_FTYPE_V4HI_INT:
33715 case V4DF_FTYPE_V4DF_INT:
33716 case V4DF_FTYPE_V8DF_INT:
33717 case V4SF_FTYPE_V4SF_INT:
33718 case V4SF_FTYPE_V8SF_INT:
33719 case V2DI_FTYPE_V2DI_INT:
33720 case V2DF_FTYPE_V2DF_INT:
33721 case V2DF_FTYPE_V4DF_INT:
33722 case V16HI_FTYPE_V16HI_INT:
33723 case V8SI_FTYPE_V8SI_INT:
33724 case V16SI_FTYPE_V16SI_INT:
33725 case V4SI_FTYPE_V16SI_INT:
33726 case V4DI_FTYPE_V4DI_INT:
33727 case V2DI_FTYPE_V4DI_INT:
33728 case V4DI_FTYPE_V8DI_INT:
33729 case HI_FTYPE_HI_INT:
33730 nargs = 2;
33731 nargs_constant = 1;
33732 break;
33733 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33734 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33735 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33736 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33737 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33738 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33739 case HI_FTYPE_V16SI_V16SI_HI:
33740 case QI_FTYPE_V8DI_V8DI_QI:
33741 case V16HI_FTYPE_V16SI_V16HI_HI:
33742 case V16QI_FTYPE_V16SI_V16QI_HI:
33743 case V16QI_FTYPE_V8DI_V16QI_QI:
33744 case V16SF_FTYPE_V16SF_V16SF_HI:
33745 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33746 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33747 case V16SF_FTYPE_V16SI_V16SF_HI:
33748 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33749 case V16SF_FTYPE_V4SF_V16SF_HI:
33750 case V16SI_FTYPE_SI_V16SI_HI:
33751 case V16SI_FTYPE_V16HI_V16SI_HI:
33752 case V16SI_FTYPE_V16QI_V16SI_HI:
33753 case V16SI_FTYPE_V16SF_V16SI_HI:
33754 case V16SI_FTYPE_V16SI_V16SI_HI:
33755 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33756 case V16SI_FTYPE_V4SI_V16SI_HI:
33757 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33758 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33759 case V8DF_FTYPE_V2DF_V8DF_QI:
33760 case V8DF_FTYPE_V4DF_V8DF_QI:
33761 case V8DF_FTYPE_V8DF_V8DF_QI:
33762 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33763 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33764 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33765 case V8DF_FTYPE_V8SF_V8DF_QI:
33766 case V8DF_FTYPE_V8SI_V8DF_QI:
33767 case V8DI_FTYPE_DI_V8DI_QI:
33768 case V8DI_FTYPE_V16QI_V8DI_QI:
33769 case V8DI_FTYPE_V2DI_V8DI_QI:
33770 case V8DI_FTYPE_V4DI_V8DI_QI:
33771 case V8DI_FTYPE_V8DI_V8DI_QI:
33772 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33773 case V8DI_FTYPE_V8HI_V8DI_QI:
33774 case V8DI_FTYPE_V8SI_V8DI_QI:
33775 case V8HI_FTYPE_V8DI_V8HI_QI:
33776 case V8SF_FTYPE_V8DF_V8SF_QI:
33777 case V8SI_FTYPE_V8DF_V8SI_QI:
33778 case V8SI_FTYPE_V8DI_V8SI_QI:
33779 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33780 nargs = 3;
33781 break;
33782 case V32QI_FTYPE_V32QI_V32QI_INT:
33783 case V16HI_FTYPE_V16HI_V16HI_INT:
33784 case V16QI_FTYPE_V16QI_V16QI_INT:
33785 case V4DI_FTYPE_V4DI_V4DI_INT:
33786 case V8HI_FTYPE_V8HI_V8HI_INT:
33787 case V8SI_FTYPE_V8SI_V8SI_INT:
33788 case V8SI_FTYPE_V8SI_V4SI_INT:
33789 case V8SF_FTYPE_V8SF_V8SF_INT:
33790 case V8SF_FTYPE_V8SF_V4SF_INT:
33791 case V4SI_FTYPE_V4SI_V4SI_INT:
33792 case V4DF_FTYPE_V4DF_V4DF_INT:
33793 case V16SF_FTYPE_V16SF_V16SF_INT:
33794 case V16SF_FTYPE_V16SF_V4SF_INT:
33795 case V16SI_FTYPE_V16SI_V4SI_INT:
33796 case V4DF_FTYPE_V4DF_V2DF_INT:
33797 case V4SF_FTYPE_V4SF_V4SF_INT:
33798 case V2DI_FTYPE_V2DI_V2DI_INT:
33799 case V4DI_FTYPE_V4DI_V2DI_INT:
33800 case V2DF_FTYPE_V2DF_V2DF_INT:
33801 case QI_FTYPE_V8DI_V8DI_INT:
33802 case QI_FTYPE_V8DF_V8DF_INT:
33803 case QI_FTYPE_V2DF_V2DF_INT:
33804 case QI_FTYPE_V4SF_V4SF_INT:
33805 case HI_FTYPE_V16SI_V16SI_INT:
33806 case HI_FTYPE_V16SF_V16SF_INT:
33807 nargs = 3;
33808 nargs_constant = 1;
33809 break;
33810 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33811 nargs = 3;
33812 rmode = V4DImode;
33813 nargs_constant = 1;
33814 break;
33815 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33816 nargs = 3;
33817 rmode = V2DImode;
33818 nargs_constant = 1;
33819 break;
33820 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33821 nargs = 3;
33822 rmode = DImode;
33823 nargs_constant = 1;
33824 break;
33825 case V2DI_FTYPE_V2DI_UINT_UINT:
33826 nargs = 3;
33827 nargs_constant = 2;
33828 break;
33829 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33830 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33831 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33832 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33833 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33834 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33835 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33836 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33837 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33838 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33839 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33840 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33841 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33842 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33843 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33844 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33845 nargs = 4;
33846 break;
33847 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33848 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33849 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33850 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33851 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33852 nargs = 4;
33853 nargs_constant = 1;
33854 break;
33855 case QI_FTYPE_V2DF_V2DF_INT_QI:
33856 case QI_FTYPE_V4SF_V4SF_INT_QI:
33857 nargs = 4;
33858 mask_pos = 1;
33859 nargs_constant = 1;
33860 break;
33861 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33862 nargs = 4;
33863 nargs_constant = 2;
33864 break;
33865 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33866 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33867 nargs = 4;
33868 break;
33869 case QI_FTYPE_V8DI_V8DI_INT_QI:
33870 case HI_FTYPE_V16SI_V16SI_INT_HI:
33871 case QI_FTYPE_V8DF_V8DF_INT_QI:
33872 case HI_FTYPE_V16SF_V16SF_INT_HI:
33873 mask_pos = 1;
33874 nargs = 4;
33875 nargs_constant = 1;
33876 break;
33877 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33878 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33879 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33880 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33881 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33882 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33883 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33884 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33885 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33886 nargs = 4;
33887 mask_pos = 2;
33888 nargs_constant = 1;
33889 break;
33890 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33891 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33892 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33893 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33894 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33895 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33896 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33897 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33898 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33899 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33900 nargs = 5;
33901 mask_pos = 2;
33902 nargs_constant = 1;
33903 break;
33904 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33905 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33906 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33907 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33908 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33909 nargs = 5;
33910 mask_pos = 1;
33911 nargs_constant = 1;
33912 break;
33914 default:
33915 gcc_unreachable ();
33918 gcc_assert (nargs <= ARRAY_SIZE (args));
33920 if (comparison != UNKNOWN)
33922 gcc_assert (nargs == 2);
33923 return ix86_expand_sse_compare (d, exp, target, swap);
33926 if (rmode == VOIDmode || rmode == tmode)
33928 if (optimize
33929 || target == 0
33930 || GET_MODE (target) != tmode
33931 || !insn_p->operand[0].predicate (target, tmode))
33932 target = gen_reg_rtx (tmode);
33933 real_target = target;
33935 else
33937 real_target = gen_reg_rtx (tmode);
33938 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
33941 for (i = 0; i < nargs; i++)
33943 tree arg = CALL_EXPR_ARG (exp, i);
33944 rtx op = expand_normal (arg);
33945 enum machine_mode mode = insn_p->operand[i + 1].mode;
33946 bool match = insn_p->operand[i + 1].predicate (op, mode);
33948 if (last_arg_count && (i + 1) == nargs)
33950 /* SIMD shift insns take either an 8-bit immediate or
33951 register as count. But builtin functions take int as
33952 count. If count doesn't match, we put it in register. */
33953 if (!match)
33955 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
33956 if (!insn_p->operand[i + 1].predicate (op, mode))
33957 op = copy_to_reg (op);
33960 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33961 (!mask_pos && (nargs - i) <= nargs_constant))
33963 if (!match)
33964 switch (icode)
33966 case CODE_FOR_avx2_inserti128:
33967 case CODE_FOR_avx2_extracti128:
33968 error ("the last argument must be an 1-bit immediate");
33969 return const0_rtx;
33971 case CODE_FOR_avx512f_cmpv8di3_mask:
33972 case CODE_FOR_avx512f_cmpv16si3_mask:
33973 case CODE_FOR_avx512f_ucmpv8di3_mask:
33974 case CODE_FOR_avx512f_ucmpv16si3_mask:
33975 error ("the last argument must be a 3-bit immediate");
33976 return const0_rtx;
33978 case CODE_FOR_sse4_1_roundsd:
33979 case CODE_FOR_sse4_1_roundss:
33981 case CODE_FOR_sse4_1_roundpd:
33982 case CODE_FOR_sse4_1_roundps:
33983 case CODE_FOR_avx_roundpd256:
33984 case CODE_FOR_avx_roundps256:
33986 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
33987 case CODE_FOR_sse4_1_roundps_sfix:
33988 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
33989 case CODE_FOR_avx_roundps_sfix256:
33991 case CODE_FOR_sse4_1_blendps:
33992 case CODE_FOR_avx_blendpd256:
33993 case CODE_FOR_avx_vpermilv4df:
33994 case CODE_FOR_avx512f_getmantv8df_mask:
33995 case CODE_FOR_avx512f_getmantv16sf_mask:
33996 error ("the last argument must be a 4-bit immediate");
33997 return const0_rtx;
33999 case CODE_FOR_sha1rnds4:
34000 case CODE_FOR_sse4_1_blendpd:
34001 case CODE_FOR_avx_vpermilv2df:
34002 case CODE_FOR_xop_vpermil2v2df3:
34003 case CODE_FOR_xop_vpermil2v4sf3:
34004 case CODE_FOR_xop_vpermil2v4df3:
34005 case CODE_FOR_xop_vpermil2v8sf3:
34006 case CODE_FOR_avx512f_vinsertf32x4_mask:
34007 case CODE_FOR_avx512f_vinserti32x4_mask:
34008 case CODE_FOR_avx512f_vextractf32x4_mask:
34009 case CODE_FOR_avx512f_vextracti32x4_mask:
34010 error ("the last argument must be a 2-bit immediate");
34011 return const0_rtx;
34013 case CODE_FOR_avx_vextractf128v4df:
34014 case CODE_FOR_avx_vextractf128v8sf:
34015 case CODE_FOR_avx_vextractf128v8si:
34016 case CODE_FOR_avx_vinsertf128v4df:
34017 case CODE_FOR_avx_vinsertf128v8sf:
34018 case CODE_FOR_avx_vinsertf128v8si:
34019 case CODE_FOR_avx512f_vinsertf64x4_mask:
34020 case CODE_FOR_avx512f_vinserti64x4_mask:
34021 case CODE_FOR_avx512f_vextractf64x4_mask:
34022 case CODE_FOR_avx512f_vextracti64x4_mask:
34023 error ("the last argument must be a 1-bit immediate");
34024 return const0_rtx;
34026 case CODE_FOR_avx_vmcmpv2df3:
34027 case CODE_FOR_avx_vmcmpv4sf3:
34028 case CODE_FOR_avx_cmpv2df3:
34029 case CODE_FOR_avx_cmpv4sf3:
34030 case CODE_FOR_avx_cmpv4df3:
34031 case CODE_FOR_avx_cmpv8sf3:
34032 case CODE_FOR_avx512f_cmpv8df3_mask:
34033 case CODE_FOR_avx512f_cmpv16sf3_mask:
34034 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34035 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34036 error ("the last argument must be a 5-bit immediate");
34037 return const0_rtx;
34039 default:
34040 switch (nargs_constant)
34042 case 2:
34043 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34044 (!mask_pos && (nargs - i) == nargs_constant))
34046 error ("the next to last argument must be an 8-bit immediate");
34047 break;
34049 case 1:
34050 error ("the last argument must be an 8-bit immediate");
34051 break;
34052 default:
34053 gcc_unreachable ();
34055 return const0_rtx;
34058 else
34060 if (VECTOR_MODE_P (mode))
34061 op = safe_vector_operand (op, mode);
34063 /* If we aren't optimizing, only allow one memory operand to
34064 be generated. */
34065 if (memory_operand (op, mode))
34066 num_memory++;
34068 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34070 if (optimize || !match || num_memory > 1)
34071 op = copy_to_mode_reg (mode, op);
34073 else
34075 op = copy_to_reg (op);
34076 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34080 args[i].op = op;
34081 args[i].mode = mode;
34084 switch (nargs)
34086 case 1:
34087 pat = GEN_FCN (icode) (real_target, args[0].op);
34088 break;
34089 case 2:
34090 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34091 break;
34092 case 3:
34093 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34094 args[2].op);
34095 break;
34096 case 4:
34097 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34098 args[2].op, args[3].op);
34099 break;
34100 case 5:
34101 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34102 args[2].op, args[3].op, args[4].op);
34103 case 6:
34104 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34105 args[2].op, args[3].op, args[4].op,
34106 args[5].op);
34107 break;
34108 default:
34109 gcc_unreachable ();
34112 if (! pat)
34113 return 0;
34115 emit_insn (pat);
34116 return target;
34119 /* Transform pattern of following layout:
34120 (parallel [
34121 set (A B)
34122 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34124 into:
34125 (set (A B))
34128 (parallel [ A B
34130 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34133 into:
34134 (parallel [ A B ... ]) */
34136 static rtx
34137 ix86_erase_embedded_rounding (rtx pat)
34139 if (GET_CODE (pat) == INSN)
34140 pat = PATTERN (pat);
34142 gcc_assert (GET_CODE (pat) == PARALLEL);
34144 if (XVECLEN (pat, 0) == 2)
34146 rtx p0 = XVECEXP (pat, 0, 0);
34147 rtx p1 = XVECEXP (pat, 0, 1);
34149 gcc_assert (GET_CODE (p0) == SET
34150 && GET_CODE (p1) == UNSPEC
34151 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34153 return p0;
34155 else
34157 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34158 int i = 0;
34159 int j = 0;
34161 for (; i < XVECLEN (pat, 0); ++i)
34163 rtx elem = XVECEXP (pat, 0, i);
34164 if (GET_CODE (elem) != UNSPEC
34165 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34166 res [j++] = elem;
34169 /* No more than 1 occurence was removed. */
34170 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34172 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34176 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34177 with rounding. */
34178 static rtx
34179 ix86_expand_sse_comi_round (const struct builtin_description *d,
34180 tree exp, rtx target)
34182 rtx pat, set_dst;
34183 tree arg0 = CALL_EXPR_ARG (exp, 0);
34184 tree arg1 = CALL_EXPR_ARG (exp, 1);
34185 tree arg2 = CALL_EXPR_ARG (exp, 2);
34186 tree arg3 = CALL_EXPR_ARG (exp, 3);
34187 rtx op0 = expand_normal (arg0);
34188 rtx op1 = expand_normal (arg1);
34189 rtx op2 = expand_normal (arg2);
34190 rtx op3 = expand_normal (arg3);
34191 enum insn_code icode = d->icode;
34192 const struct insn_data_d *insn_p = &insn_data[icode];
34193 enum machine_mode mode0 = insn_p->operand[0].mode;
34194 enum machine_mode mode1 = insn_p->operand[1].mode;
34195 enum rtx_code comparison = UNEQ;
34196 bool need_ucomi = false;
34198 /* See avxintrin.h for values. */
34199 enum rtx_code comi_comparisons[32] =
34201 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34202 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34203 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34205 bool need_ucomi_values[32] =
34207 true, false, false, true, true, false, false, true,
34208 true, false, false, true, true, false, false, true,
34209 false, true, true, false, false, true, true, false,
34210 false, true, true, false, false, true, true, false
34213 if (!CONST_INT_P (op2))
34215 error ("the third argument must be comparison constant");
34216 return const0_rtx;
34218 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34220 error ("incorect comparison mode");
34221 return const0_rtx;
34224 if (!insn_p->operand[2].predicate (op3, SImode))
34226 error ("incorrect rounding operand");
34227 return const0_rtx;
34230 comparison = comi_comparisons[INTVAL (op2)];
34231 need_ucomi = need_ucomi_values[INTVAL (op2)];
34233 if (VECTOR_MODE_P (mode0))
34234 op0 = safe_vector_operand (op0, mode0);
34235 if (VECTOR_MODE_P (mode1))
34236 op1 = safe_vector_operand (op1, mode1);
34238 target = gen_reg_rtx (SImode);
34239 emit_move_insn (target, const0_rtx);
34240 target = gen_rtx_SUBREG (QImode, target, 0);
34242 if ((optimize && !register_operand (op0, mode0))
34243 || !insn_p->operand[0].predicate (op0, mode0))
34244 op0 = copy_to_mode_reg (mode0, op0);
34245 if ((optimize && !register_operand (op1, mode1))
34246 || !insn_p->operand[1].predicate (op1, mode1))
34247 op1 = copy_to_mode_reg (mode1, op1);
34249 if (need_ucomi)
34250 icode = icode == CODE_FOR_sse_comi_round
34251 ? CODE_FOR_sse_ucomi_round
34252 : CODE_FOR_sse2_ucomi_round;
34254 pat = GEN_FCN (icode) (op0, op1, op3);
34255 if (! pat)
34256 return 0;
34258 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34259 if (INTVAL (op3) == NO_ROUND)
34261 pat = ix86_erase_embedded_rounding (pat);
34262 if (! pat)
34263 return 0;
34265 set_dst = SET_DEST (pat);
34267 else
34269 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34270 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34273 emit_insn (pat);
34274 emit_insn (gen_rtx_SET (VOIDmode,
34275 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34276 gen_rtx_fmt_ee (comparison, QImode,
34277 set_dst,
34278 const0_rtx)));
34280 return SUBREG_REG (target);
34283 static rtx
34284 ix86_expand_round_builtin (const struct builtin_description *d,
34285 tree exp, rtx target)
34287 rtx pat;
34288 unsigned int i, nargs;
34289 struct
34291 rtx op;
34292 enum machine_mode mode;
34293 } args[6];
34294 enum insn_code icode = d->icode;
34295 const struct insn_data_d *insn_p = &insn_data[icode];
34296 enum machine_mode tmode = insn_p->operand[0].mode;
34297 unsigned int nargs_constant = 0;
34298 unsigned int redundant_embed_rnd = 0;
34300 switch ((enum ix86_builtin_func_type) d->flag)
34302 case UINT64_FTYPE_V2DF_INT:
34303 case UINT64_FTYPE_V4SF_INT:
34304 case UINT_FTYPE_V2DF_INT:
34305 case UINT_FTYPE_V4SF_INT:
34306 case INT64_FTYPE_V2DF_INT:
34307 case INT64_FTYPE_V4SF_INT:
34308 case INT_FTYPE_V2DF_INT:
34309 case INT_FTYPE_V4SF_INT:
34310 nargs = 2;
34311 break;
34312 case V4SF_FTYPE_V4SF_UINT_INT:
34313 case V4SF_FTYPE_V4SF_UINT64_INT:
34314 case V2DF_FTYPE_V2DF_UINT64_INT:
34315 case V4SF_FTYPE_V4SF_INT_INT:
34316 case V4SF_FTYPE_V4SF_INT64_INT:
34317 case V2DF_FTYPE_V2DF_INT64_INT:
34318 case V4SF_FTYPE_V4SF_V4SF_INT:
34319 case V2DF_FTYPE_V2DF_V2DF_INT:
34320 case V4SF_FTYPE_V4SF_V2DF_INT:
34321 case V2DF_FTYPE_V2DF_V4SF_INT:
34322 nargs = 3;
34323 break;
34324 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34325 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34326 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34327 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34328 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34329 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34330 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34331 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34332 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34333 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34334 nargs = 4;
34335 break;
34336 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34337 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34338 nargs_constant = 2;
34339 nargs = 4;
34340 break;
34341 case INT_FTYPE_V4SF_V4SF_INT_INT:
34342 case INT_FTYPE_V2DF_V2DF_INT_INT:
34343 return ix86_expand_sse_comi_round (d, exp, target);
34344 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34345 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34346 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34347 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34348 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34349 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34350 nargs = 5;
34351 break;
34352 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34353 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34354 nargs_constant = 4;
34355 nargs = 5;
34356 break;
34357 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34358 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34359 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34360 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34361 nargs_constant = 3;
34362 nargs = 5;
34363 break;
34364 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34365 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34366 nargs = 6;
34367 nargs_constant = 4;
34368 break;
34369 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34370 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34371 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34372 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34373 nargs = 6;
34374 nargs_constant = 3;
34375 break;
34376 default:
34377 gcc_unreachable ();
34379 gcc_assert (nargs <= ARRAY_SIZE (args));
34381 if (optimize
34382 || target == 0
34383 || GET_MODE (target) != tmode
34384 || !insn_p->operand[0].predicate (target, tmode))
34385 target = gen_reg_rtx (tmode);
34387 for (i = 0; i < nargs; i++)
34389 tree arg = CALL_EXPR_ARG (exp, i);
34390 rtx op = expand_normal (arg);
34391 enum machine_mode mode = insn_p->operand[i + 1].mode;
34392 bool match = insn_p->operand[i + 1].predicate (op, mode);
34394 if (i == nargs - nargs_constant)
34396 if (!match)
34398 switch (icode)
34400 case CODE_FOR_avx512f_getmantv8df_mask_round:
34401 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34402 case CODE_FOR_avx512f_getmantv2df_round:
34403 case CODE_FOR_avx512f_getmantv4sf_round:
34404 error ("the immediate argument must be a 4-bit immediate");
34405 return const0_rtx;
34406 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34407 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34408 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34409 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34410 error ("the immediate argument must be a 5-bit immediate");
34411 return const0_rtx;
34412 default:
34413 error ("the immediate argument must be an 8-bit immediate");
34414 return const0_rtx;
34418 else if (i == nargs-1)
34420 if (!insn_p->operand[nargs].predicate (op, SImode))
34422 error ("incorrect rounding operand");
34423 return const0_rtx;
34426 /* If there is no rounding use normal version of the pattern. */
34427 if (INTVAL (op) == NO_ROUND)
34428 redundant_embed_rnd = 1;
34430 else
34432 if (VECTOR_MODE_P (mode))
34433 op = safe_vector_operand (op, mode);
34435 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34437 if (optimize || !match)
34438 op = copy_to_mode_reg (mode, op);
34440 else
34442 op = copy_to_reg (op);
34443 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34447 args[i].op = op;
34448 args[i].mode = mode;
34451 switch (nargs)
34453 case 1:
34454 pat = GEN_FCN (icode) (target, args[0].op);
34455 break;
34456 case 2:
34457 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34458 break;
34459 case 3:
34460 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34461 args[2].op);
34462 break;
34463 case 4:
34464 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34465 args[2].op, args[3].op);
34466 break;
34467 case 5:
34468 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34469 args[2].op, args[3].op, args[4].op);
34470 case 6:
34471 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34472 args[2].op, args[3].op, args[4].op,
34473 args[5].op);
34474 break;
34475 default:
34476 gcc_unreachable ();
34479 if (!pat)
34480 return 0;
34482 if (redundant_embed_rnd)
34483 pat = ix86_erase_embedded_rounding (pat);
34485 emit_insn (pat);
34486 return target;
34489 /* Subroutine of ix86_expand_builtin to take care of special insns
34490 with variable number of operands. */
34492 static rtx
34493 ix86_expand_special_args_builtin (const struct builtin_description *d,
34494 tree exp, rtx target)
34496 tree arg;
34497 rtx pat, op;
34498 unsigned int i, nargs, arg_adjust, memory;
34499 bool aligned_mem = false;
34500 struct
34502 rtx op;
34503 enum machine_mode mode;
34504 } args[3];
34505 enum insn_code icode = d->icode;
34506 bool last_arg_constant = false;
34507 const struct insn_data_d *insn_p = &insn_data[icode];
34508 enum machine_mode tmode = insn_p->operand[0].mode;
34509 enum { load, store } klass;
34511 switch ((enum ix86_builtin_func_type) d->flag)
34513 case VOID_FTYPE_VOID:
34514 emit_insn (GEN_FCN (icode) (target));
34515 return 0;
34516 case VOID_FTYPE_UINT64:
34517 case VOID_FTYPE_UNSIGNED:
34518 nargs = 0;
34519 klass = store;
34520 memory = 0;
34521 break;
34523 case INT_FTYPE_VOID:
34524 case UINT64_FTYPE_VOID:
34525 case UNSIGNED_FTYPE_VOID:
34526 nargs = 0;
34527 klass = load;
34528 memory = 0;
34529 break;
34530 case UINT64_FTYPE_PUNSIGNED:
34531 case V2DI_FTYPE_PV2DI:
34532 case V4DI_FTYPE_PV4DI:
34533 case V32QI_FTYPE_PCCHAR:
34534 case V16QI_FTYPE_PCCHAR:
34535 case V8SF_FTYPE_PCV4SF:
34536 case V8SF_FTYPE_PCFLOAT:
34537 case V4SF_FTYPE_PCFLOAT:
34538 case V4DF_FTYPE_PCV2DF:
34539 case V4DF_FTYPE_PCDOUBLE:
34540 case V2DF_FTYPE_PCDOUBLE:
34541 case VOID_FTYPE_PVOID:
34542 case V16SI_FTYPE_PV4SI:
34543 case V16SF_FTYPE_PV4SF:
34544 case V8DI_FTYPE_PV4DI:
34545 case V8DI_FTYPE_PV8DI:
34546 case V8DF_FTYPE_PV4DF:
34547 nargs = 1;
34548 klass = load;
34549 memory = 0;
34550 switch (icode)
34552 case CODE_FOR_sse4_1_movntdqa:
34553 case CODE_FOR_avx2_movntdqa:
34554 case CODE_FOR_avx512f_movntdqa:
34555 aligned_mem = true;
34556 break;
34557 default:
34558 break;
34560 break;
34561 case VOID_FTYPE_PV2SF_V4SF:
34562 case VOID_FTYPE_PV8DI_V8DI:
34563 case VOID_FTYPE_PV4DI_V4DI:
34564 case VOID_FTYPE_PV2DI_V2DI:
34565 case VOID_FTYPE_PCHAR_V32QI:
34566 case VOID_FTYPE_PCHAR_V16QI:
34567 case VOID_FTYPE_PFLOAT_V16SF:
34568 case VOID_FTYPE_PFLOAT_V8SF:
34569 case VOID_FTYPE_PFLOAT_V4SF:
34570 case VOID_FTYPE_PDOUBLE_V8DF:
34571 case VOID_FTYPE_PDOUBLE_V4DF:
34572 case VOID_FTYPE_PDOUBLE_V2DF:
34573 case VOID_FTYPE_PLONGLONG_LONGLONG:
34574 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34575 case VOID_FTYPE_PINT_INT:
34576 nargs = 1;
34577 klass = store;
34578 /* Reserve memory operand for target. */
34579 memory = ARRAY_SIZE (args);
34580 switch (icode)
34582 /* These builtins and instructions require the memory
34583 to be properly aligned. */
34584 case CODE_FOR_avx_movntv4di:
34585 case CODE_FOR_sse2_movntv2di:
34586 case CODE_FOR_avx_movntv8sf:
34587 case CODE_FOR_sse_movntv4sf:
34588 case CODE_FOR_sse4a_vmmovntv4sf:
34589 case CODE_FOR_avx_movntv4df:
34590 case CODE_FOR_sse2_movntv2df:
34591 case CODE_FOR_sse4a_vmmovntv2df:
34592 case CODE_FOR_sse2_movntidi:
34593 case CODE_FOR_sse_movntq:
34594 case CODE_FOR_sse2_movntisi:
34595 case CODE_FOR_avx512f_movntv16sf:
34596 case CODE_FOR_avx512f_movntv8df:
34597 case CODE_FOR_avx512f_movntv8di:
34598 aligned_mem = true;
34599 break;
34600 default:
34601 break;
34603 break;
34604 case V4SF_FTYPE_V4SF_PCV2SF:
34605 case V2DF_FTYPE_V2DF_PCDOUBLE:
34606 nargs = 2;
34607 klass = load;
34608 memory = 1;
34609 break;
34610 case V8SF_FTYPE_PCV8SF_V8SI:
34611 case V4DF_FTYPE_PCV4DF_V4DI:
34612 case V4SF_FTYPE_PCV4SF_V4SI:
34613 case V2DF_FTYPE_PCV2DF_V2DI:
34614 case V8SI_FTYPE_PCV8SI_V8SI:
34615 case V4DI_FTYPE_PCV4DI_V4DI:
34616 case V4SI_FTYPE_PCV4SI_V4SI:
34617 case V2DI_FTYPE_PCV2DI_V2DI:
34618 nargs = 2;
34619 klass = load;
34620 memory = 0;
34621 break;
34622 case VOID_FTYPE_PV8DF_V8DF_QI:
34623 case VOID_FTYPE_PV16SF_V16SF_HI:
34624 case VOID_FTYPE_PV8DI_V8DI_QI:
34625 case VOID_FTYPE_PV16SI_V16SI_HI:
34626 switch (icode)
34628 /* These builtins and instructions require the memory
34629 to be properly aligned. */
34630 case CODE_FOR_avx512f_storev16sf_mask:
34631 case CODE_FOR_avx512f_storev16si_mask:
34632 case CODE_FOR_avx512f_storev8df_mask:
34633 case CODE_FOR_avx512f_storev8di_mask:
34634 aligned_mem = true;
34635 break;
34636 default:
34637 break;
34639 /* FALLTHRU */
34640 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34641 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34642 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34643 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34644 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34645 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34646 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34647 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34648 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34649 case VOID_FTYPE_PFLOAT_V4SF_QI:
34650 case VOID_FTYPE_PV8SI_V8DI_QI:
34651 case VOID_FTYPE_PV8HI_V8DI_QI:
34652 case VOID_FTYPE_PV16HI_V16SI_HI:
34653 case VOID_FTYPE_PV16QI_V8DI_QI:
34654 case VOID_FTYPE_PV16QI_V16SI_HI:
34655 nargs = 2;
34656 klass = store;
34657 /* Reserve memory operand for target. */
34658 memory = ARRAY_SIZE (args);
34659 break;
34660 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34661 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34662 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34663 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34664 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34665 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34666 nargs = 3;
34667 klass = load;
34668 memory = 0;
34669 switch (icode)
34671 /* These builtins and instructions require the memory
34672 to be properly aligned. */
34673 case CODE_FOR_avx512f_loadv16sf_mask:
34674 case CODE_FOR_avx512f_loadv16si_mask:
34675 case CODE_FOR_avx512f_loadv8df_mask:
34676 case CODE_FOR_avx512f_loadv8di_mask:
34677 aligned_mem = true;
34678 break;
34679 default:
34680 break;
34682 break;
34683 case VOID_FTYPE_UINT_UINT_UINT:
34684 case VOID_FTYPE_UINT64_UINT_UINT:
34685 case UCHAR_FTYPE_UINT_UINT_UINT:
34686 case UCHAR_FTYPE_UINT64_UINT_UINT:
34687 nargs = 3;
34688 klass = load;
34689 memory = ARRAY_SIZE (args);
34690 last_arg_constant = true;
34691 break;
34692 default:
34693 gcc_unreachable ();
34696 gcc_assert (nargs <= ARRAY_SIZE (args));
34698 if (klass == store)
34700 arg = CALL_EXPR_ARG (exp, 0);
34701 op = expand_normal (arg);
34702 gcc_assert (target == 0);
34703 if (memory)
34705 op = ix86_zero_extend_to_Pmode (op);
34706 target = gen_rtx_MEM (tmode, op);
34707 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34708 on it. Try to improve it using get_pointer_alignment,
34709 and if the special builtin is one that requires strict
34710 mode alignment, also from it's GET_MODE_ALIGNMENT.
34711 Failure to do so could lead to ix86_legitimate_combined_insn
34712 rejecting all changes to such insns. */
34713 unsigned int align = get_pointer_alignment (arg);
34714 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34715 align = GET_MODE_ALIGNMENT (tmode);
34716 if (MEM_ALIGN (target) < align)
34717 set_mem_align (target, align);
34719 else
34720 target = force_reg (tmode, op);
34721 arg_adjust = 1;
34723 else
34725 arg_adjust = 0;
34726 if (optimize
34727 || target == 0
34728 || !register_operand (target, tmode)
34729 || GET_MODE (target) != tmode)
34730 target = gen_reg_rtx (tmode);
34733 for (i = 0; i < nargs; i++)
34735 enum machine_mode mode = insn_p->operand[i + 1].mode;
34736 bool match;
34738 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34739 op = expand_normal (arg);
34740 match = insn_p->operand[i + 1].predicate (op, mode);
34742 if (last_arg_constant && (i + 1) == nargs)
34744 if (!match)
34746 if (icode == CODE_FOR_lwp_lwpvalsi3
34747 || icode == CODE_FOR_lwp_lwpinssi3
34748 || icode == CODE_FOR_lwp_lwpvaldi3
34749 || icode == CODE_FOR_lwp_lwpinsdi3)
34750 error ("the last argument must be a 32-bit immediate");
34751 else
34752 error ("the last argument must be an 8-bit immediate");
34753 return const0_rtx;
34756 else
34758 if (i == memory)
34760 /* This must be the memory operand. */
34761 op = ix86_zero_extend_to_Pmode (op);
34762 op = gen_rtx_MEM (mode, op);
34763 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34764 on it. Try to improve it using get_pointer_alignment,
34765 and if the special builtin is one that requires strict
34766 mode alignment, also from it's GET_MODE_ALIGNMENT.
34767 Failure to do so could lead to ix86_legitimate_combined_insn
34768 rejecting all changes to such insns. */
34769 unsigned int align = get_pointer_alignment (arg);
34770 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34771 align = GET_MODE_ALIGNMENT (mode);
34772 if (MEM_ALIGN (op) < align)
34773 set_mem_align (op, align);
34775 else
34777 /* This must be register. */
34778 if (VECTOR_MODE_P (mode))
34779 op = safe_vector_operand (op, mode);
34781 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34782 op = copy_to_mode_reg (mode, op);
34783 else
34785 op = copy_to_reg (op);
34786 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34791 args[i].op = op;
34792 args[i].mode = mode;
34795 switch (nargs)
34797 case 0:
34798 pat = GEN_FCN (icode) (target);
34799 break;
34800 case 1:
34801 pat = GEN_FCN (icode) (target, args[0].op);
34802 break;
34803 case 2:
34804 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34805 break;
34806 case 3:
34807 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34808 break;
34809 default:
34810 gcc_unreachable ();
34813 if (! pat)
34814 return 0;
34815 emit_insn (pat);
34816 return klass == store ? 0 : target;
34819 /* Return the integer constant in ARG. Constrain it to be in the range
34820 of the subparts of VEC_TYPE; issue an error if not. */
34822 static int
34823 get_element_number (tree vec_type, tree arg)
34825 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34827 if (!tree_fits_uhwi_p (arg)
34828 || (elt = tree_to_uhwi (arg), elt > max))
34830 error ("selector must be an integer constant in the range 0..%wi", max);
34831 return 0;
34834 return elt;
34837 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34838 ix86_expand_vector_init. We DO have language-level syntax for this, in
34839 the form of (type){ init-list }. Except that since we can't place emms
34840 instructions from inside the compiler, we can't allow the use of MMX
34841 registers unless the user explicitly asks for it. So we do *not* define
34842 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34843 we have builtins invoked by mmintrin.h that gives us license to emit
34844 these sorts of instructions. */
34846 static rtx
34847 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34849 enum machine_mode tmode = TYPE_MODE (type);
34850 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34851 int i, n_elt = GET_MODE_NUNITS (tmode);
34852 rtvec v = rtvec_alloc (n_elt);
34854 gcc_assert (VECTOR_MODE_P (tmode));
34855 gcc_assert (call_expr_nargs (exp) == n_elt);
34857 for (i = 0; i < n_elt; ++i)
34859 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34860 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34863 if (!target || !register_operand (target, tmode))
34864 target = gen_reg_rtx (tmode);
34866 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34867 return target;
34870 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34871 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34872 had a language-level syntax for referencing vector elements. */
34874 static rtx
34875 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34877 enum machine_mode tmode, mode0;
34878 tree arg0, arg1;
34879 int elt;
34880 rtx op0;
34882 arg0 = CALL_EXPR_ARG (exp, 0);
34883 arg1 = CALL_EXPR_ARG (exp, 1);
34885 op0 = expand_normal (arg0);
34886 elt = get_element_number (TREE_TYPE (arg0), arg1);
34888 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34889 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34890 gcc_assert (VECTOR_MODE_P (mode0));
34892 op0 = force_reg (mode0, op0);
34894 if (optimize || !target || !register_operand (target, tmode))
34895 target = gen_reg_rtx (tmode);
34897 ix86_expand_vector_extract (true, target, op0, elt);
34899 return target;
34902 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34903 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34904 a language-level syntax for referencing vector elements. */
34906 static rtx
34907 ix86_expand_vec_set_builtin (tree exp)
34909 enum machine_mode tmode, mode1;
34910 tree arg0, arg1, arg2;
34911 int elt;
34912 rtx op0, op1, target;
34914 arg0 = CALL_EXPR_ARG (exp, 0);
34915 arg1 = CALL_EXPR_ARG (exp, 1);
34916 arg2 = CALL_EXPR_ARG (exp, 2);
34918 tmode = TYPE_MODE (TREE_TYPE (arg0));
34919 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34920 gcc_assert (VECTOR_MODE_P (tmode));
34922 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34923 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34924 elt = get_element_number (TREE_TYPE (arg0), arg2);
34926 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34927 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34929 op0 = force_reg (tmode, op0);
34930 op1 = force_reg (mode1, op1);
34932 /* OP0 is the source of these builtin functions and shouldn't be
34933 modified. Create a copy, use it and return it as target. */
34934 target = gen_reg_rtx (tmode);
34935 emit_move_insn (target, op0);
34936 ix86_expand_vector_set (true, target, op1, elt);
34938 return target;
34941 /* Expand an expression EXP that calls a built-in function,
34942 with result going to TARGET if that's convenient
34943 (and in mode MODE if that's convenient).
34944 SUBTARGET may be used as the target for computing one of EXP's operands.
34945 IGNORE is nonzero if the value is to be ignored. */
34947 static rtx
34948 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34949 enum machine_mode mode, int ignore)
34951 const struct builtin_description *d;
34952 size_t i;
34953 enum insn_code icode;
34954 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34955 tree arg0, arg1, arg2, arg3, arg4;
34956 rtx op0, op1, op2, op3, op4, pat, insn;
34957 enum machine_mode mode0, mode1, mode2, mode3, mode4;
34958 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34960 /* For CPU builtins that can be folded, fold first and expand the fold. */
34961 switch (fcode)
34963 case IX86_BUILTIN_CPU_INIT:
34965 /* Make it call __cpu_indicator_init in libgcc. */
34966 tree call_expr, fndecl, type;
34967 type = build_function_type_list (integer_type_node, NULL_TREE);
34968 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34969 call_expr = build_call_expr (fndecl, 0);
34970 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34972 case IX86_BUILTIN_CPU_IS:
34973 case IX86_BUILTIN_CPU_SUPPORTS:
34975 tree arg0 = CALL_EXPR_ARG (exp, 0);
34976 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
34977 gcc_assert (fold_expr != NULL_TREE);
34978 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
34982 /* Determine whether the builtin function is available under the current ISA.
34983 Originally the builtin was not created if it wasn't applicable to the
34984 current ISA based on the command line switches. With function specific
34985 options, we need to check in the context of the function making the call
34986 whether it is supported. */
34987 if (ix86_builtins_isa[fcode].isa
34988 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
34990 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
34991 NULL, (enum fpmath_unit) 0, false);
34993 if (!opts)
34994 error ("%qE needs unknown isa option", fndecl);
34995 else
34997 gcc_assert (opts != NULL);
34998 error ("%qE needs isa option %s", fndecl, opts);
34999 free (opts);
35001 return const0_rtx;
35004 switch (fcode)
35006 case IX86_BUILTIN_MASKMOVQ:
35007 case IX86_BUILTIN_MASKMOVDQU:
35008 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35009 ? CODE_FOR_mmx_maskmovq
35010 : CODE_FOR_sse2_maskmovdqu);
35011 /* Note the arg order is different from the operand order. */
35012 arg1 = CALL_EXPR_ARG (exp, 0);
35013 arg2 = CALL_EXPR_ARG (exp, 1);
35014 arg0 = CALL_EXPR_ARG (exp, 2);
35015 op0 = expand_normal (arg0);
35016 op1 = expand_normal (arg1);
35017 op2 = expand_normal (arg2);
35018 mode0 = insn_data[icode].operand[0].mode;
35019 mode1 = insn_data[icode].operand[1].mode;
35020 mode2 = insn_data[icode].operand[2].mode;
35022 op0 = ix86_zero_extend_to_Pmode (op0);
35023 op0 = gen_rtx_MEM (mode1, op0);
35025 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35026 op0 = copy_to_mode_reg (mode0, op0);
35027 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35028 op1 = copy_to_mode_reg (mode1, op1);
35029 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35030 op2 = copy_to_mode_reg (mode2, op2);
35031 pat = GEN_FCN (icode) (op0, op1, op2);
35032 if (! pat)
35033 return 0;
35034 emit_insn (pat);
35035 return 0;
35037 case IX86_BUILTIN_LDMXCSR:
35038 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35039 target = assign_386_stack_local (SImode, SLOT_TEMP);
35040 emit_move_insn (target, op0);
35041 emit_insn (gen_sse_ldmxcsr (target));
35042 return 0;
35044 case IX86_BUILTIN_STMXCSR:
35045 target = assign_386_stack_local (SImode, SLOT_TEMP);
35046 emit_insn (gen_sse_stmxcsr (target));
35047 return copy_to_mode_reg (SImode, target);
35049 case IX86_BUILTIN_CLFLUSH:
35050 arg0 = CALL_EXPR_ARG (exp, 0);
35051 op0 = expand_normal (arg0);
35052 icode = CODE_FOR_sse2_clflush;
35053 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35054 op0 = ix86_zero_extend_to_Pmode (op0);
35056 emit_insn (gen_sse2_clflush (op0));
35057 return 0;
35059 case IX86_BUILTIN_MONITOR:
35060 arg0 = CALL_EXPR_ARG (exp, 0);
35061 arg1 = CALL_EXPR_ARG (exp, 1);
35062 arg2 = CALL_EXPR_ARG (exp, 2);
35063 op0 = expand_normal (arg0);
35064 op1 = expand_normal (arg1);
35065 op2 = expand_normal (arg2);
35066 if (!REG_P (op0))
35067 op0 = ix86_zero_extend_to_Pmode (op0);
35068 if (!REG_P (op1))
35069 op1 = copy_to_mode_reg (SImode, op1);
35070 if (!REG_P (op2))
35071 op2 = copy_to_mode_reg (SImode, op2);
35072 emit_insn (ix86_gen_monitor (op0, op1, op2));
35073 return 0;
35075 case IX86_BUILTIN_MWAIT:
35076 arg0 = CALL_EXPR_ARG (exp, 0);
35077 arg1 = CALL_EXPR_ARG (exp, 1);
35078 op0 = expand_normal (arg0);
35079 op1 = expand_normal (arg1);
35080 if (!REG_P (op0))
35081 op0 = copy_to_mode_reg (SImode, op0);
35082 if (!REG_P (op1))
35083 op1 = copy_to_mode_reg (SImode, op1);
35084 emit_insn (gen_sse3_mwait (op0, op1));
35085 return 0;
35087 case IX86_BUILTIN_VEC_INIT_V2SI:
35088 case IX86_BUILTIN_VEC_INIT_V4HI:
35089 case IX86_BUILTIN_VEC_INIT_V8QI:
35090 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35092 case IX86_BUILTIN_VEC_EXT_V2DF:
35093 case IX86_BUILTIN_VEC_EXT_V2DI:
35094 case IX86_BUILTIN_VEC_EXT_V4SF:
35095 case IX86_BUILTIN_VEC_EXT_V4SI:
35096 case IX86_BUILTIN_VEC_EXT_V8HI:
35097 case IX86_BUILTIN_VEC_EXT_V2SI:
35098 case IX86_BUILTIN_VEC_EXT_V4HI:
35099 case IX86_BUILTIN_VEC_EXT_V16QI:
35100 return ix86_expand_vec_ext_builtin (exp, target);
35102 case IX86_BUILTIN_VEC_SET_V2DI:
35103 case IX86_BUILTIN_VEC_SET_V4SF:
35104 case IX86_BUILTIN_VEC_SET_V4SI:
35105 case IX86_BUILTIN_VEC_SET_V8HI:
35106 case IX86_BUILTIN_VEC_SET_V4HI:
35107 case IX86_BUILTIN_VEC_SET_V16QI:
35108 return ix86_expand_vec_set_builtin (exp);
35110 case IX86_BUILTIN_INFQ:
35111 case IX86_BUILTIN_HUGE_VALQ:
35113 REAL_VALUE_TYPE inf;
35114 rtx tmp;
35116 real_inf (&inf);
35117 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35119 tmp = validize_mem (force_const_mem (mode, tmp));
35121 if (target == 0)
35122 target = gen_reg_rtx (mode);
35124 emit_move_insn (target, tmp);
35125 return target;
35128 case IX86_BUILTIN_RDPMC:
35129 case IX86_BUILTIN_RDTSC:
35130 case IX86_BUILTIN_RDTSCP:
35132 op0 = gen_reg_rtx (DImode);
35133 op1 = gen_reg_rtx (DImode);
35135 if (fcode == IX86_BUILTIN_RDPMC)
35137 arg0 = CALL_EXPR_ARG (exp, 0);
35138 op2 = expand_normal (arg0);
35139 if (!register_operand (op2, SImode))
35140 op2 = copy_to_mode_reg (SImode, op2);
35142 insn = (TARGET_64BIT
35143 ? gen_rdpmc_rex64 (op0, op1, op2)
35144 : gen_rdpmc (op0, op2));
35145 emit_insn (insn);
35147 else if (fcode == IX86_BUILTIN_RDTSC)
35149 insn = (TARGET_64BIT
35150 ? gen_rdtsc_rex64 (op0, op1)
35151 : gen_rdtsc (op0));
35152 emit_insn (insn);
35154 else
35156 op2 = gen_reg_rtx (SImode);
35158 insn = (TARGET_64BIT
35159 ? gen_rdtscp_rex64 (op0, op1, op2)
35160 : gen_rdtscp (op0, op2));
35161 emit_insn (insn);
35163 arg0 = CALL_EXPR_ARG (exp, 0);
35164 op4 = expand_normal (arg0);
35165 if (!address_operand (op4, VOIDmode))
35167 op4 = convert_memory_address (Pmode, op4);
35168 op4 = copy_addr_to_reg (op4);
35170 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35173 if (target == 0)
35175 /* mode is VOIDmode if __builtin_rd* has been called
35176 without lhs. */
35177 if (mode == VOIDmode)
35178 return target;
35179 target = gen_reg_rtx (mode);
35182 if (TARGET_64BIT)
35184 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35185 op1, 1, OPTAB_DIRECT);
35186 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35187 op0, 1, OPTAB_DIRECT);
35190 emit_move_insn (target, op0);
35191 return target;
35193 case IX86_BUILTIN_FXSAVE:
35194 case IX86_BUILTIN_FXRSTOR:
35195 case IX86_BUILTIN_FXSAVE64:
35196 case IX86_BUILTIN_FXRSTOR64:
35197 case IX86_BUILTIN_FNSTENV:
35198 case IX86_BUILTIN_FLDENV:
35199 case IX86_BUILTIN_FNSTSW:
35200 mode0 = BLKmode;
35201 switch (fcode)
35203 case IX86_BUILTIN_FXSAVE:
35204 icode = CODE_FOR_fxsave;
35205 break;
35206 case IX86_BUILTIN_FXRSTOR:
35207 icode = CODE_FOR_fxrstor;
35208 break;
35209 case IX86_BUILTIN_FXSAVE64:
35210 icode = CODE_FOR_fxsave64;
35211 break;
35212 case IX86_BUILTIN_FXRSTOR64:
35213 icode = CODE_FOR_fxrstor64;
35214 break;
35215 case IX86_BUILTIN_FNSTENV:
35216 icode = CODE_FOR_fnstenv;
35217 break;
35218 case IX86_BUILTIN_FLDENV:
35219 icode = CODE_FOR_fldenv;
35220 break;
35221 case IX86_BUILTIN_FNSTSW:
35222 icode = CODE_FOR_fnstsw;
35223 mode0 = HImode;
35224 break;
35225 default:
35226 gcc_unreachable ();
35229 arg0 = CALL_EXPR_ARG (exp, 0);
35230 op0 = expand_normal (arg0);
35232 if (!address_operand (op0, VOIDmode))
35234 op0 = convert_memory_address (Pmode, op0);
35235 op0 = copy_addr_to_reg (op0);
35237 op0 = gen_rtx_MEM (mode0, op0);
35239 pat = GEN_FCN (icode) (op0);
35240 if (pat)
35241 emit_insn (pat);
35242 return 0;
35244 case IX86_BUILTIN_XSAVE:
35245 case IX86_BUILTIN_XRSTOR:
35246 case IX86_BUILTIN_XSAVE64:
35247 case IX86_BUILTIN_XRSTOR64:
35248 case IX86_BUILTIN_XSAVEOPT:
35249 case IX86_BUILTIN_XSAVEOPT64:
35250 arg0 = CALL_EXPR_ARG (exp, 0);
35251 arg1 = CALL_EXPR_ARG (exp, 1);
35252 op0 = expand_normal (arg0);
35253 op1 = expand_normal (arg1);
35255 if (!address_operand (op0, VOIDmode))
35257 op0 = convert_memory_address (Pmode, op0);
35258 op0 = copy_addr_to_reg (op0);
35260 op0 = gen_rtx_MEM (BLKmode, op0);
35262 op1 = force_reg (DImode, op1);
35264 if (TARGET_64BIT)
35266 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35267 NULL, 1, OPTAB_DIRECT);
35268 switch (fcode)
35270 case IX86_BUILTIN_XSAVE:
35271 icode = CODE_FOR_xsave_rex64;
35272 break;
35273 case IX86_BUILTIN_XRSTOR:
35274 icode = CODE_FOR_xrstor_rex64;
35275 break;
35276 case IX86_BUILTIN_XSAVE64:
35277 icode = CODE_FOR_xsave64;
35278 break;
35279 case IX86_BUILTIN_XRSTOR64:
35280 icode = CODE_FOR_xrstor64;
35281 break;
35282 case IX86_BUILTIN_XSAVEOPT:
35283 icode = CODE_FOR_xsaveopt_rex64;
35284 break;
35285 case IX86_BUILTIN_XSAVEOPT64:
35286 icode = CODE_FOR_xsaveopt64;
35287 break;
35288 default:
35289 gcc_unreachable ();
35292 op2 = gen_lowpart (SImode, op2);
35293 op1 = gen_lowpart (SImode, op1);
35294 pat = GEN_FCN (icode) (op0, op1, op2);
35296 else
35298 switch (fcode)
35300 case IX86_BUILTIN_XSAVE:
35301 icode = CODE_FOR_xsave;
35302 break;
35303 case IX86_BUILTIN_XRSTOR:
35304 icode = CODE_FOR_xrstor;
35305 break;
35306 case IX86_BUILTIN_XSAVEOPT:
35307 icode = CODE_FOR_xsaveopt;
35308 break;
35309 default:
35310 gcc_unreachable ();
35312 pat = GEN_FCN (icode) (op0, op1);
35315 if (pat)
35316 emit_insn (pat);
35317 return 0;
35319 case IX86_BUILTIN_LLWPCB:
35320 arg0 = CALL_EXPR_ARG (exp, 0);
35321 op0 = expand_normal (arg0);
35322 icode = CODE_FOR_lwp_llwpcb;
35323 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35324 op0 = ix86_zero_extend_to_Pmode (op0);
35325 emit_insn (gen_lwp_llwpcb (op0));
35326 return 0;
35328 case IX86_BUILTIN_SLWPCB:
35329 icode = CODE_FOR_lwp_slwpcb;
35330 if (!target
35331 || !insn_data[icode].operand[0].predicate (target, Pmode))
35332 target = gen_reg_rtx (Pmode);
35333 emit_insn (gen_lwp_slwpcb (target));
35334 return target;
35336 case IX86_BUILTIN_BEXTRI32:
35337 case IX86_BUILTIN_BEXTRI64:
35338 arg0 = CALL_EXPR_ARG (exp, 0);
35339 arg1 = CALL_EXPR_ARG (exp, 1);
35340 op0 = expand_normal (arg0);
35341 op1 = expand_normal (arg1);
35342 icode = (fcode == IX86_BUILTIN_BEXTRI32
35343 ? CODE_FOR_tbm_bextri_si
35344 : CODE_FOR_tbm_bextri_di);
35345 if (!CONST_INT_P (op1))
35347 error ("last argument must be an immediate");
35348 return const0_rtx;
35350 else
35352 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35353 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35354 op1 = GEN_INT (length);
35355 op2 = GEN_INT (lsb_index);
35356 pat = GEN_FCN (icode) (target, op0, op1, op2);
35357 if (pat)
35358 emit_insn (pat);
35359 return target;
35362 case IX86_BUILTIN_RDRAND16_STEP:
35363 icode = CODE_FOR_rdrandhi_1;
35364 mode0 = HImode;
35365 goto rdrand_step;
35367 case IX86_BUILTIN_RDRAND32_STEP:
35368 icode = CODE_FOR_rdrandsi_1;
35369 mode0 = SImode;
35370 goto rdrand_step;
35372 case IX86_BUILTIN_RDRAND64_STEP:
35373 icode = CODE_FOR_rdranddi_1;
35374 mode0 = DImode;
35376 rdrand_step:
35377 op0 = gen_reg_rtx (mode0);
35378 emit_insn (GEN_FCN (icode) (op0));
35380 arg0 = CALL_EXPR_ARG (exp, 0);
35381 op1 = expand_normal (arg0);
35382 if (!address_operand (op1, VOIDmode))
35384 op1 = convert_memory_address (Pmode, op1);
35385 op1 = copy_addr_to_reg (op1);
35387 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35389 op1 = gen_reg_rtx (SImode);
35390 emit_move_insn (op1, CONST1_RTX (SImode));
35392 /* Emit SImode conditional move. */
35393 if (mode0 == HImode)
35395 op2 = gen_reg_rtx (SImode);
35396 emit_insn (gen_zero_extendhisi2 (op2, op0));
35398 else if (mode0 == SImode)
35399 op2 = op0;
35400 else
35401 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35403 if (target == 0)
35404 target = gen_reg_rtx (SImode);
35406 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35407 const0_rtx);
35408 emit_insn (gen_rtx_SET (VOIDmode, target,
35409 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35410 return target;
35412 case IX86_BUILTIN_RDSEED16_STEP:
35413 icode = CODE_FOR_rdseedhi_1;
35414 mode0 = HImode;
35415 goto rdseed_step;
35417 case IX86_BUILTIN_RDSEED32_STEP:
35418 icode = CODE_FOR_rdseedsi_1;
35419 mode0 = SImode;
35420 goto rdseed_step;
35422 case IX86_BUILTIN_RDSEED64_STEP:
35423 icode = CODE_FOR_rdseeddi_1;
35424 mode0 = DImode;
35426 rdseed_step:
35427 op0 = gen_reg_rtx (mode0);
35428 emit_insn (GEN_FCN (icode) (op0));
35430 arg0 = CALL_EXPR_ARG (exp, 0);
35431 op1 = expand_normal (arg0);
35432 if (!address_operand (op1, VOIDmode))
35434 op1 = convert_memory_address (Pmode, op1);
35435 op1 = copy_addr_to_reg (op1);
35437 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35439 op2 = gen_reg_rtx (QImode);
35441 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35442 const0_rtx);
35443 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35445 if (target == 0)
35446 target = gen_reg_rtx (SImode);
35448 emit_insn (gen_zero_extendqisi2 (target, op2));
35449 return target;
35451 case IX86_BUILTIN_ADDCARRYX32:
35452 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35453 mode0 = SImode;
35454 goto addcarryx;
35456 case IX86_BUILTIN_ADDCARRYX64:
35457 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35458 mode0 = DImode;
35460 addcarryx:
35461 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35462 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35463 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35464 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35466 op0 = gen_reg_rtx (QImode);
35468 /* Generate CF from input operand. */
35469 op1 = expand_normal (arg0);
35470 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35471 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35473 /* Gen ADCX instruction to compute X+Y+CF. */
35474 op2 = expand_normal (arg1);
35475 op3 = expand_normal (arg2);
35477 if (!REG_P (op2))
35478 op2 = copy_to_mode_reg (mode0, op2);
35479 if (!REG_P (op3))
35480 op3 = copy_to_mode_reg (mode0, op3);
35482 op0 = gen_reg_rtx (mode0);
35484 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35485 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35486 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35488 /* Store the result. */
35489 op4 = expand_normal (arg3);
35490 if (!address_operand (op4, VOIDmode))
35492 op4 = convert_memory_address (Pmode, op4);
35493 op4 = copy_addr_to_reg (op4);
35495 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35497 /* Return current CF value. */
35498 if (target == 0)
35499 target = gen_reg_rtx (QImode);
35501 PUT_MODE (pat, QImode);
35502 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35503 return target;
35505 case IX86_BUILTIN_READ_FLAGS:
35506 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35508 if (optimize
35509 || target == NULL_RTX
35510 || !nonimmediate_operand (target, word_mode)
35511 || GET_MODE (target) != word_mode)
35512 target = gen_reg_rtx (word_mode);
35514 emit_insn (gen_pop (target));
35515 return target;
35517 case IX86_BUILTIN_WRITE_FLAGS:
35519 arg0 = CALL_EXPR_ARG (exp, 0);
35520 op0 = expand_normal (arg0);
35521 if (!general_no_elim_operand (op0, word_mode))
35522 op0 = copy_to_mode_reg (word_mode, op0);
35524 emit_insn (gen_push (op0));
35525 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35526 return 0;
35528 case IX86_BUILTIN_KORTESTC16:
35529 icode = CODE_FOR_kortestchi;
35530 mode0 = HImode;
35531 mode1 = CCCmode;
35532 goto kortest;
35534 case IX86_BUILTIN_KORTESTZ16:
35535 icode = CODE_FOR_kortestzhi;
35536 mode0 = HImode;
35537 mode1 = CCZmode;
35539 kortest:
35540 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35541 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35542 op0 = expand_normal (arg0);
35543 op1 = expand_normal (arg1);
35545 op0 = copy_to_reg (op0);
35546 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35547 op1 = copy_to_reg (op1);
35548 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35550 target = gen_reg_rtx (QImode);
35551 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35553 /* Emit kortest. */
35554 emit_insn (GEN_FCN (icode) (op0, op1));
35555 /* And use setcc to return result from flags. */
35556 ix86_expand_setcc (target, EQ,
35557 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35558 return target;
35560 case IX86_BUILTIN_GATHERSIV2DF:
35561 icode = CODE_FOR_avx2_gathersiv2df;
35562 goto gather_gen;
35563 case IX86_BUILTIN_GATHERSIV4DF:
35564 icode = CODE_FOR_avx2_gathersiv4df;
35565 goto gather_gen;
35566 case IX86_BUILTIN_GATHERDIV2DF:
35567 icode = CODE_FOR_avx2_gatherdiv2df;
35568 goto gather_gen;
35569 case IX86_BUILTIN_GATHERDIV4DF:
35570 icode = CODE_FOR_avx2_gatherdiv4df;
35571 goto gather_gen;
35572 case IX86_BUILTIN_GATHERSIV4SF:
35573 icode = CODE_FOR_avx2_gathersiv4sf;
35574 goto gather_gen;
35575 case IX86_BUILTIN_GATHERSIV8SF:
35576 icode = CODE_FOR_avx2_gathersiv8sf;
35577 goto gather_gen;
35578 case IX86_BUILTIN_GATHERDIV4SF:
35579 icode = CODE_FOR_avx2_gatherdiv4sf;
35580 goto gather_gen;
35581 case IX86_BUILTIN_GATHERDIV8SF:
35582 icode = CODE_FOR_avx2_gatherdiv8sf;
35583 goto gather_gen;
35584 case IX86_BUILTIN_GATHERSIV2DI:
35585 icode = CODE_FOR_avx2_gathersiv2di;
35586 goto gather_gen;
35587 case IX86_BUILTIN_GATHERSIV4DI:
35588 icode = CODE_FOR_avx2_gathersiv4di;
35589 goto gather_gen;
35590 case IX86_BUILTIN_GATHERDIV2DI:
35591 icode = CODE_FOR_avx2_gatherdiv2di;
35592 goto gather_gen;
35593 case IX86_BUILTIN_GATHERDIV4DI:
35594 icode = CODE_FOR_avx2_gatherdiv4di;
35595 goto gather_gen;
35596 case IX86_BUILTIN_GATHERSIV4SI:
35597 icode = CODE_FOR_avx2_gathersiv4si;
35598 goto gather_gen;
35599 case IX86_BUILTIN_GATHERSIV8SI:
35600 icode = CODE_FOR_avx2_gathersiv8si;
35601 goto gather_gen;
35602 case IX86_BUILTIN_GATHERDIV4SI:
35603 icode = CODE_FOR_avx2_gatherdiv4si;
35604 goto gather_gen;
35605 case IX86_BUILTIN_GATHERDIV8SI:
35606 icode = CODE_FOR_avx2_gatherdiv8si;
35607 goto gather_gen;
35608 case IX86_BUILTIN_GATHERALTSIV4DF:
35609 icode = CODE_FOR_avx2_gathersiv4df;
35610 goto gather_gen;
35611 case IX86_BUILTIN_GATHERALTDIV8SF:
35612 icode = CODE_FOR_avx2_gatherdiv8sf;
35613 goto gather_gen;
35614 case IX86_BUILTIN_GATHERALTSIV4DI:
35615 icode = CODE_FOR_avx2_gathersiv4di;
35616 goto gather_gen;
35617 case IX86_BUILTIN_GATHERALTDIV8SI:
35618 icode = CODE_FOR_avx2_gatherdiv8si;
35619 goto gather_gen;
35620 case IX86_BUILTIN_GATHER3SIV16SF:
35621 icode = CODE_FOR_avx512f_gathersiv16sf;
35622 goto gather_gen;
35623 case IX86_BUILTIN_GATHER3SIV8DF:
35624 icode = CODE_FOR_avx512f_gathersiv8df;
35625 goto gather_gen;
35626 case IX86_BUILTIN_GATHER3DIV16SF:
35627 icode = CODE_FOR_avx512f_gatherdiv16sf;
35628 goto gather_gen;
35629 case IX86_BUILTIN_GATHER3DIV8DF:
35630 icode = CODE_FOR_avx512f_gatherdiv8df;
35631 goto gather_gen;
35632 case IX86_BUILTIN_GATHER3SIV16SI:
35633 icode = CODE_FOR_avx512f_gathersiv16si;
35634 goto gather_gen;
35635 case IX86_BUILTIN_GATHER3SIV8DI:
35636 icode = CODE_FOR_avx512f_gathersiv8di;
35637 goto gather_gen;
35638 case IX86_BUILTIN_GATHER3DIV16SI:
35639 icode = CODE_FOR_avx512f_gatherdiv16si;
35640 goto gather_gen;
35641 case IX86_BUILTIN_GATHER3DIV8DI:
35642 icode = CODE_FOR_avx512f_gatherdiv8di;
35643 goto gather_gen;
35644 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35645 icode = CODE_FOR_avx512f_gathersiv8df;
35646 goto gather_gen;
35647 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35648 icode = CODE_FOR_avx512f_gatherdiv16sf;
35649 goto gather_gen;
35650 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35651 icode = CODE_FOR_avx512f_gathersiv8di;
35652 goto gather_gen;
35653 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35654 icode = CODE_FOR_avx512f_gatherdiv16si;
35655 goto gather_gen;
35656 case IX86_BUILTIN_SCATTERSIV16SF:
35657 icode = CODE_FOR_avx512f_scattersiv16sf;
35658 goto scatter_gen;
35659 case IX86_BUILTIN_SCATTERSIV8DF:
35660 icode = CODE_FOR_avx512f_scattersiv8df;
35661 goto scatter_gen;
35662 case IX86_BUILTIN_SCATTERDIV16SF:
35663 icode = CODE_FOR_avx512f_scatterdiv16sf;
35664 goto scatter_gen;
35665 case IX86_BUILTIN_SCATTERDIV8DF:
35666 icode = CODE_FOR_avx512f_scatterdiv8df;
35667 goto scatter_gen;
35668 case IX86_BUILTIN_SCATTERSIV16SI:
35669 icode = CODE_FOR_avx512f_scattersiv16si;
35670 goto scatter_gen;
35671 case IX86_BUILTIN_SCATTERSIV8DI:
35672 icode = CODE_FOR_avx512f_scattersiv8di;
35673 goto scatter_gen;
35674 case IX86_BUILTIN_SCATTERDIV16SI:
35675 icode = CODE_FOR_avx512f_scatterdiv16si;
35676 goto scatter_gen;
35677 case IX86_BUILTIN_SCATTERDIV8DI:
35678 icode = CODE_FOR_avx512f_scatterdiv8di;
35679 goto scatter_gen;
35681 case IX86_BUILTIN_GATHERPFDPD:
35682 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35683 goto vec_prefetch_gen;
35684 case IX86_BUILTIN_GATHERPFDPS:
35685 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35686 goto vec_prefetch_gen;
35687 case IX86_BUILTIN_GATHERPFQPD:
35688 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35689 goto vec_prefetch_gen;
35690 case IX86_BUILTIN_GATHERPFQPS:
35691 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35692 goto vec_prefetch_gen;
35693 case IX86_BUILTIN_SCATTERPFDPD:
35694 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35695 goto vec_prefetch_gen;
35696 case IX86_BUILTIN_SCATTERPFDPS:
35697 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35698 goto vec_prefetch_gen;
35699 case IX86_BUILTIN_SCATTERPFQPD:
35700 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35701 goto vec_prefetch_gen;
35702 case IX86_BUILTIN_SCATTERPFQPS:
35703 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35704 goto vec_prefetch_gen;
35706 gather_gen:
35707 rtx half;
35708 rtx (*gen) (rtx, rtx);
35710 arg0 = CALL_EXPR_ARG (exp, 0);
35711 arg1 = CALL_EXPR_ARG (exp, 1);
35712 arg2 = CALL_EXPR_ARG (exp, 2);
35713 arg3 = CALL_EXPR_ARG (exp, 3);
35714 arg4 = CALL_EXPR_ARG (exp, 4);
35715 op0 = expand_normal (arg0);
35716 op1 = expand_normal (arg1);
35717 op2 = expand_normal (arg2);
35718 op3 = expand_normal (arg3);
35719 op4 = expand_normal (arg4);
35720 /* Note the arg order is different from the operand order. */
35721 mode0 = insn_data[icode].operand[1].mode;
35722 mode2 = insn_data[icode].operand[3].mode;
35723 mode3 = insn_data[icode].operand[4].mode;
35724 mode4 = insn_data[icode].operand[5].mode;
35726 if (target == NULL_RTX
35727 || GET_MODE (target) != insn_data[icode].operand[0].mode
35728 || !insn_data[icode].operand[0].predicate (target,
35729 GET_MODE (target)))
35730 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35731 else
35732 subtarget = target;
35734 switch (fcode)
35736 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35737 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35738 half = gen_reg_rtx (V8SImode);
35739 if (!nonimmediate_operand (op2, V16SImode))
35740 op2 = copy_to_mode_reg (V16SImode, op2);
35741 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35742 op2 = half;
35743 break;
35744 case IX86_BUILTIN_GATHERALTSIV4DF:
35745 case IX86_BUILTIN_GATHERALTSIV4DI:
35746 half = gen_reg_rtx (V4SImode);
35747 if (!nonimmediate_operand (op2, V8SImode))
35748 op2 = copy_to_mode_reg (V8SImode, op2);
35749 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35750 op2 = half;
35751 break;
35752 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35753 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35754 half = gen_reg_rtx (mode0);
35755 if (mode0 == V8SFmode)
35756 gen = gen_vec_extract_lo_v16sf;
35757 else
35758 gen = gen_vec_extract_lo_v16si;
35759 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35760 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35761 emit_insn (gen (half, op0));
35762 op0 = half;
35763 if (GET_MODE (op3) != VOIDmode)
35765 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35766 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35767 emit_insn (gen (half, op3));
35768 op3 = half;
35770 break;
35771 case IX86_BUILTIN_GATHERALTDIV8SF:
35772 case IX86_BUILTIN_GATHERALTDIV8SI:
35773 half = gen_reg_rtx (mode0);
35774 if (mode0 == V4SFmode)
35775 gen = gen_vec_extract_lo_v8sf;
35776 else
35777 gen = gen_vec_extract_lo_v8si;
35778 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35779 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35780 emit_insn (gen (half, op0));
35781 op0 = half;
35782 if (GET_MODE (op3) != VOIDmode)
35784 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35785 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35786 emit_insn (gen (half, op3));
35787 op3 = half;
35789 break;
35790 default:
35791 break;
35794 /* Force memory operand only with base register here. But we
35795 don't want to do it on memory operand for other builtin
35796 functions. */
35797 op1 = ix86_zero_extend_to_Pmode (op1);
35799 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35800 op0 = copy_to_mode_reg (mode0, op0);
35801 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35802 op1 = copy_to_mode_reg (Pmode, op1);
35803 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35804 op2 = copy_to_mode_reg (mode2, op2);
35805 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35807 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35808 op3 = copy_to_mode_reg (mode3, op3);
35810 else
35812 op3 = copy_to_reg (op3);
35813 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35815 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35817 error ("the last argument must be scale 1, 2, 4, 8");
35818 return const0_rtx;
35821 /* Optimize. If mask is known to have all high bits set,
35822 replace op0 with pc_rtx to signal that the instruction
35823 overwrites the whole destination and doesn't use its
35824 previous contents. */
35825 if (optimize)
35827 if (TREE_CODE (arg3) == INTEGER_CST)
35829 if (integer_all_onesp (arg3))
35830 op0 = pc_rtx;
35832 else if (TREE_CODE (arg3) == VECTOR_CST)
35834 unsigned int negative = 0;
35835 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35837 tree cst = VECTOR_CST_ELT (arg3, i);
35838 if (TREE_CODE (cst) == INTEGER_CST
35839 && tree_int_cst_sign_bit (cst))
35840 negative++;
35841 else if (TREE_CODE (cst) == REAL_CST
35842 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35843 negative++;
35845 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35846 op0 = pc_rtx;
35848 else if (TREE_CODE (arg3) == SSA_NAME
35849 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35851 /* Recognize also when mask is like:
35852 __v2df src = _mm_setzero_pd ();
35853 __v2df mask = _mm_cmpeq_pd (src, src);
35855 __v8sf src = _mm256_setzero_ps ();
35856 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35857 as that is a cheaper way to load all ones into
35858 a register than having to load a constant from
35859 memory. */
35860 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35861 if (is_gimple_call (def_stmt))
35863 tree fndecl = gimple_call_fndecl (def_stmt);
35864 if (fndecl
35865 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35866 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35868 case IX86_BUILTIN_CMPPD:
35869 case IX86_BUILTIN_CMPPS:
35870 case IX86_BUILTIN_CMPPD256:
35871 case IX86_BUILTIN_CMPPS256:
35872 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35873 break;
35874 /* FALLTHRU */
35875 case IX86_BUILTIN_CMPEQPD:
35876 case IX86_BUILTIN_CMPEQPS:
35877 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35878 && initializer_zerop (gimple_call_arg (def_stmt,
35879 1)))
35880 op0 = pc_rtx;
35881 break;
35882 default:
35883 break;
35889 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35890 if (! pat)
35891 return const0_rtx;
35892 emit_insn (pat);
35894 switch (fcode)
35896 case IX86_BUILTIN_GATHER3DIV16SF:
35897 if (target == NULL_RTX)
35898 target = gen_reg_rtx (V8SFmode);
35899 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
35900 break;
35901 case IX86_BUILTIN_GATHER3DIV16SI:
35902 if (target == NULL_RTX)
35903 target = gen_reg_rtx (V8SImode);
35904 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
35905 break;
35906 case IX86_BUILTIN_GATHERDIV8SF:
35907 if (target == NULL_RTX)
35908 target = gen_reg_rtx (V4SFmode);
35909 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
35910 break;
35911 case IX86_BUILTIN_GATHERDIV8SI:
35912 if (target == NULL_RTX)
35913 target = gen_reg_rtx (V4SImode);
35914 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
35915 break;
35916 default:
35917 target = subtarget;
35918 break;
35920 return target;
35922 scatter_gen:
35923 arg0 = CALL_EXPR_ARG (exp, 0);
35924 arg1 = CALL_EXPR_ARG (exp, 1);
35925 arg2 = CALL_EXPR_ARG (exp, 2);
35926 arg3 = CALL_EXPR_ARG (exp, 3);
35927 arg4 = CALL_EXPR_ARG (exp, 4);
35928 op0 = expand_normal (arg0);
35929 op1 = expand_normal (arg1);
35930 op2 = expand_normal (arg2);
35931 op3 = expand_normal (arg3);
35932 op4 = expand_normal (arg4);
35933 mode1 = insn_data[icode].operand[1].mode;
35934 mode2 = insn_data[icode].operand[2].mode;
35935 mode3 = insn_data[icode].operand[3].mode;
35936 mode4 = insn_data[icode].operand[4].mode;
35938 /* Force memory operand only with base register here. But we
35939 don't want to do it on memory operand for other builtin
35940 functions. */
35941 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
35943 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35944 op0 = copy_to_mode_reg (Pmode, op0);
35946 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
35948 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35949 op1 = copy_to_mode_reg (mode1, op1);
35951 else
35953 op1 = copy_to_reg (op1);
35954 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
35957 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35958 op2 = copy_to_mode_reg (mode2, op2);
35960 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35961 op3 = copy_to_mode_reg (mode3, op3);
35963 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35965 error ("the last argument must be scale 1, 2, 4, 8");
35966 return const0_rtx;
35969 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35970 if (! pat)
35971 return const0_rtx;
35973 emit_insn (pat);
35974 return 0;
35976 vec_prefetch_gen:
35977 arg0 = CALL_EXPR_ARG (exp, 0);
35978 arg1 = CALL_EXPR_ARG (exp, 1);
35979 arg2 = CALL_EXPR_ARG (exp, 2);
35980 arg3 = CALL_EXPR_ARG (exp, 3);
35981 arg4 = CALL_EXPR_ARG (exp, 4);
35982 op0 = expand_normal (arg0);
35983 op1 = expand_normal (arg1);
35984 op2 = expand_normal (arg2);
35985 op3 = expand_normal (arg3);
35986 op4 = expand_normal (arg4);
35987 mode0 = insn_data[icode].operand[0].mode;
35988 mode1 = insn_data[icode].operand[1].mode;
35989 mode3 = insn_data[icode].operand[3].mode;
35990 mode4 = insn_data[icode].operand[4].mode;
35992 if (GET_MODE (op0) == mode0
35993 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
35995 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35996 op0 = copy_to_mode_reg (mode0, op0);
35998 else if (op0 != constm1_rtx)
36000 op0 = copy_to_reg (op0);
36001 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36004 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36005 op1 = copy_to_mode_reg (mode1, op1);
36007 /* Force memory operand only with base register here. But we
36008 don't want to do it on memory operand for other builtin
36009 functions. */
36010 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36012 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36013 op2 = copy_to_mode_reg (Pmode, op2);
36015 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36017 error ("the forth argument must be scale 1, 2, 4, 8");
36018 return const0_rtx;
36021 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36023 error ("incorrect hint operand");
36024 return const0_rtx;
36027 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36028 if (! pat)
36029 return const0_rtx;
36031 emit_insn (pat);
36033 return 0;
36035 case IX86_BUILTIN_XABORT:
36036 icode = CODE_FOR_xabort;
36037 arg0 = CALL_EXPR_ARG (exp, 0);
36038 op0 = expand_normal (arg0);
36039 mode0 = insn_data[icode].operand[0].mode;
36040 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36042 error ("the xabort's argument must be an 8-bit immediate");
36043 return const0_rtx;
36045 emit_insn (gen_xabort (op0));
36046 return 0;
36048 default:
36049 break;
36052 for (i = 0, d = bdesc_special_args;
36053 i < ARRAY_SIZE (bdesc_special_args);
36054 i++, d++)
36055 if (d->code == fcode)
36056 return ix86_expand_special_args_builtin (d, exp, target);
36058 for (i = 0, d = bdesc_args;
36059 i < ARRAY_SIZE (bdesc_args);
36060 i++, d++)
36061 if (d->code == fcode)
36062 switch (fcode)
36064 case IX86_BUILTIN_FABSQ:
36065 case IX86_BUILTIN_COPYSIGNQ:
36066 if (!TARGET_SSE)
36067 /* Emit a normal call if SSE isn't available. */
36068 return expand_call (exp, target, ignore);
36069 default:
36070 return ix86_expand_args_builtin (d, exp, target);
36073 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36074 if (d->code == fcode)
36075 return ix86_expand_sse_comi (d, exp, target);
36077 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36078 if (d->code == fcode)
36079 return ix86_expand_round_builtin (d, exp, target);
36081 for (i = 0, d = bdesc_pcmpestr;
36082 i < ARRAY_SIZE (bdesc_pcmpestr);
36083 i++, d++)
36084 if (d->code == fcode)
36085 return ix86_expand_sse_pcmpestr (d, exp, target);
36087 for (i = 0, d = bdesc_pcmpistr;
36088 i < ARRAY_SIZE (bdesc_pcmpistr);
36089 i++, d++)
36090 if (d->code == fcode)
36091 return ix86_expand_sse_pcmpistr (d, exp, target);
36093 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36094 if (d->code == fcode)
36095 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36096 (enum ix86_builtin_func_type)
36097 d->flag, d->comparison);
36099 gcc_unreachable ();
36102 /* This returns the target-specific builtin with code CODE if
36103 current_function_decl has visibility on this builtin, which is checked
36104 using isa flags. Returns NULL_TREE otherwise. */
36106 static tree ix86_get_builtin (enum ix86_builtins code)
36108 struct cl_target_option *opts;
36109 tree target_tree = NULL_TREE;
36111 /* Determine the isa flags of current_function_decl. */
36113 if (current_function_decl)
36114 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36116 if (target_tree == NULL)
36117 target_tree = target_option_default_node;
36119 opts = TREE_TARGET_OPTION (target_tree);
36121 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36122 return ix86_builtin_decl (code, true);
36123 else
36124 return NULL_TREE;
36127 /* Returns a function decl for a vectorized version of the builtin function
36128 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36129 if it is not available. */
36131 static tree
36132 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36133 tree type_in)
36135 enum machine_mode in_mode, out_mode;
36136 int in_n, out_n;
36137 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36139 if (TREE_CODE (type_out) != VECTOR_TYPE
36140 || TREE_CODE (type_in) != VECTOR_TYPE
36141 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36142 return NULL_TREE;
36144 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36145 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36146 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36147 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36149 switch (fn)
36151 case BUILT_IN_SQRT:
36152 if (out_mode == DFmode && in_mode == DFmode)
36154 if (out_n == 2 && in_n == 2)
36155 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36156 else if (out_n == 4 && in_n == 4)
36157 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36158 else if (out_n == 8 && in_n == 8)
36159 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36161 break;
36163 case BUILT_IN_EXP2F:
36164 if (out_mode == SFmode && in_mode == SFmode)
36166 if (out_n == 16 && in_n == 16)
36167 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36169 break;
36171 case BUILT_IN_SQRTF:
36172 if (out_mode == SFmode && in_mode == SFmode)
36174 if (out_n == 4 && in_n == 4)
36175 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36176 else if (out_n == 8 && in_n == 8)
36177 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36178 else if (out_n == 16 && in_n == 16)
36179 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36181 break;
36183 case BUILT_IN_IFLOOR:
36184 case BUILT_IN_LFLOOR:
36185 case BUILT_IN_LLFLOOR:
36186 /* The round insn does not trap on denormals. */
36187 if (flag_trapping_math || !TARGET_ROUND)
36188 break;
36190 if (out_mode == SImode && in_mode == DFmode)
36192 if (out_n == 4 && in_n == 2)
36193 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36194 else if (out_n == 8 && in_n == 4)
36195 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36196 else if (out_n == 16 && in_n == 8)
36197 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36199 break;
36201 case BUILT_IN_IFLOORF:
36202 case BUILT_IN_LFLOORF:
36203 case BUILT_IN_LLFLOORF:
36204 /* The round insn does not trap on denormals. */
36205 if (flag_trapping_math || !TARGET_ROUND)
36206 break;
36208 if (out_mode == SImode && in_mode == SFmode)
36210 if (out_n == 4 && in_n == 4)
36211 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36212 else if (out_n == 8 && in_n == 8)
36213 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36215 break;
36217 case BUILT_IN_ICEIL:
36218 case BUILT_IN_LCEIL:
36219 case BUILT_IN_LLCEIL:
36220 /* The round insn does not trap on denormals. */
36221 if (flag_trapping_math || !TARGET_ROUND)
36222 break;
36224 if (out_mode == SImode && in_mode == DFmode)
36226 if (out_n == 4 && in_n == 2)
36227 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36228 else if (out_n == 8 && in_n == 4)
36229 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36230 else if (out_n == 16 && in_n == 8)
36231 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36233 break;
36235 case BUILT_IN_ICEILF:
36236 case BUILT_IN_LCEILF:
36237 case BUILT_IN_LLCEILF:
36238 /* The round insn does not trap on denormals. */
36239 if (flag_trapping_math || !TARGET_ROUND)
36240 break;
36242 if (out_mode == SImode && in_mode == SFmode)
36244 if (out_n == 4 && in_n == 4)
36245 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36246 else if (out_n == 8 && in_n == 8)
36247 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36249 break;
36251 case BUILT_IN_IRINT:
36252 case BUILT_IN_LRINT:
36253 case BUILT_IN_LLRINT:
36254 if (out_mode == SImode && in_mode == DFmode)
36256 if (out_n == 4 && in_n == 2)
36257 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36258 else if (out_n == 8 && in_n == 4)
36259 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36261 break;
36263 case BUILT_IN_IRINTF:
36264 case BUILT_IN_LRINTF:
36265 case BUILT_IN_LLRINTF:
36266 if (out_mode == SImode && in_mode == SFmode)
36268 if (out_n == 4 && in_n == 4)
36269 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36270 else if (out_n == 8 && in_n == 8)
36271 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36273 break;
36275 case BUILT_IN_IROUND:
36276 case BUILT_IN_LROUND:
36277 case BUILT_IN_LLROUND:
36278 /* The round insn does not trap on denormals. */
36279 if (flag_trapping_math || !TARGET_ROUND)
36280 break;
36282 if (out_mode == SImode && in_mode == DFmode)
36284 if (out_n == 4 && in_n == 2)
36285 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36286 else if (out_n == 8 && in_n == 4)
36287 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36288 else if (out_n == 16 && in_n == 8)
36289 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36291 break;
36293 case BUILT_IN_IROUNDF:
36294 case BUILT_IN_LROUNDF:
36295 case BUILT_IN_LLROUNDF:
36296 /* The round insn does not trap on denormals. */
36297 if (flag_trapping_math || !TARGET_ROUND)
36298 break;
36300 if (out_mode == SImode && in_mode == SFmode)
36302 if (out_n == 4 && in_n == 4)
36303 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36304 else if (out_n == 8 && in_n == 8)
36305 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36307 break;
36309 case BUILT_IN_COPYSIGN:
36310 if (out_mode == DFmode && in_mode == DFmode)
36312 if (out_n == 2 && in_n == 2)
36313 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36314 else if (out_n == 4 && in_n == 4)
36315 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36316 else if (out_n == 8 && in_n == 8)
36317 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36319 break;
36321 case BUILT_IN_COPYSIGNF:
36322 if (out_mode == SFmode && in_mode == SFmode)
36324 if (out_n == 4 && in_n == 4)
36325 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36326 else if (out_n == 8 && in_n == 8)
36327 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36328 else if (out_n == 16 && in_n == 16)
36329 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36331 break;
36333 case BUILT_IN_FLOOR:
36334 /* The round insn does not trap on denormals. */
36335 if (flag_trapping_math || !TARGET_ROUND)
36336 break;
36338 if (out_mode == DFmode && in_mode == DFmode)
36340 if (out_n == 2 && in_n == 2)
36341 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36342 else if (out_n == 4 && in_n == 4)
36343 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36345 break;
36347 case BUILT_IN_FLOORF:
36348 /* The round insn does not trap on denormals. */
36349 if (flag_trapping_math || !TARGET_ROUND)
36350 break;
36352 if (out_mode == SFmode && in_mode == SFmode)
36354 if (out_n == 4 && in_n == 4)
36355 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36356 else if (out_n == 8 && in_n == 8)
36357 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36359 break;
36361 case BUILT_IN_CEIL:
36362 /* The round insn does not trap on denormals. */
36363 if (flag_trapping_math || !TARGET_ROUND)
36364 break;
36366 if (out_mode == DFmode && in_mode == DFmode)
36368 if (out_n == 2 && in_n == 2)
36369 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36370 else if (out_n == 4 && in_n == 4)
36371 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36373 break;
36375 case BUILT_IN_CEILF:
36376 /* The round insn does not trap on denormals. */
36377 if (flag_trapping_math || !TARGET_ROUND)
36378 break;
36380 if (out_mode == SFmode && in_mode == SFmode)
36382 if (out_n == 4 && in_n == 4)
36383 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36384 else if (out_n == 8 && in_n == 8)
36385 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36387 break;
36389 case BUILT_IN_TRUNC:
36390 /* The round insn does not trap on denormals. */
36391 if (flag_trapping_math || !TARGET_ROUND)
36392 break;
36394 if (out_mode == DFmode && in_mode == DFmode)
36396 if (out_n == 2 && in_n == 2)
36397 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36398 else if (out_n == 4 && in_n == 4)
36399 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36401 break;
36403 case BUILT_IN_TRUNCF:
36404 /* The round insn does not trap on denormals. */
36405 if (flag_trapping_math || !TARGET_ROUND)
36406 break;
36408 if (out_mode == SFmode && in_mode == SFmode)
36410 if (out_n == 4 && in_n == 4)
36411 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36412 else if (out_n == 8 && in_n == 8)
36413 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36415 break;
36417 case BUILT_IN_RINT:
36418 /* The round insn does not trap on denormals. */
36419 if (flag_trapping_math || !TARGET_ROUND)
36420 break;
36422 if (out_mode == DFmode && in_mode == DFmode)
36424 if (out_n == 2 && in_n == 2)
36425 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36426 else if (out_n == 4 && in_n == 4)
36427 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36429 break;
36431 case BUILT_IN_RINTF:
36432 /* The round insn does not trap on denormals. */
36433 if (flag_trapping_math || !TARGET_ROUND)
36434 break;
36436 if (out_mode == SFmode && in_mode == SFmode)
36438 if (out_n == 4 && in_n == 4)
36439 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36440 else if (out_n == 8 && in_n == 8)
36441 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36443 break;
36445 case BUILT_IN_ROUND:
36446 /* The round insn does not trap on denormals. */
36447 if (flag_trapping_math || !TARGET_ROUND)
36448 break;
36450 if (out_mode == DFmode && in_mode == DFmode)
36452 if (out_n == 2 && in_n == 2)
36453 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36454 else if (out_n == 4 && in_n == 4)
36455 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36457 break;
36459 case BUILT_IN_ROUNDF:
36460 /* The round insn does not trap on denormals. */
36461 if (flag_trapping_math || !TARGET_ROUND)
36462 break;
36464 if (out_mode == SFmode && in_mode == SFmode)
36466 if (out_n == 4 && in_n == 4)
36467 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36468 else if (out_n == 8 && in_n == 8)
36469 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36471 break;
36473 case BUILT_IN_FMA:
36474 if (out_mode == DFmode && in_mode == DFmode)
36476 if (out_n == 2 && in_n == 2)
36477 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36478 if (out_n == 4 && in_n == 4)
36479 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36481 break;
36483 case BUILT_IN_FMAF:
36484 if (out_mode == SFmode && in_mode == SFmode)
36486 if (out_n == 4 && in_n == 4)
36487 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36488 if (out_n == 8 && in_n == 8)
36489 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36491 break;
36493 default:
36494 break;
36497 /* Dispatch to a handler for a vectorization library. */
36498 if (ix86_veclib_handler)
36499 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36500 type_in);
36502 return NULL_TREE;
36505 /* Handler for an SVML-style interface to
36506 a library with vectorized intrinsics. */
36508 static tree
36509 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36511 char name[20];
36512 tree fntype, new_fndecl, args;
36513 unsigned arity;
36514 const char *bname;
36515 enum machine_mode el_mode, in_mode;
36516 int n, in_n;
36518 /* The SVML is suitable for unsafe math only. */
36519 if (!flag_unsafe_math_optimizations)
36520 return NULL_TREE;
36522 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36523 n = TYPE_VECTOR_SUBPARTS (type_out);
36524 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36525 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36526 if (el_mode != in_mode
36527 || n != in_n)
36528 return NULL_TREE;
36530 switch (fn)
36532 case BUILT_IN_EXP:
36533 case BUILT_IN_LOG:
36534 case BUILT_IN_LOG10:
36535 case BUILT_IN_POW:
36536 case BUILT_IN_TANH:
36537 case BUILT_IN_TAN:
36538 case BUILT_IN_ATAN:
36539 case BUILT_IN_ATAN2:
36540 case BUILT_IN_ATANH:
36541 case BUILT_IN_CBRT:
36542 case BUILT_IN_SINH:
36543 case BUILT_IN_SIN:
36544 case BUILT_IN_ASINH:
36545 case BUILT_IN_ASIN:
36546 case BUILT_IN_COSH:
36547 case BUILT_IN_COS:
36548 case BUILT_IN_ACOSH:
36549 case BUILT_IN_ACOS:
36550 if (el_mode != DFmode || n != 2)
36551 return NULL_TREE;
36552 break;
36554 case BUILT_IN_EXPF:
36555 case BUILT_IN_LOGF:
36556 case BUILT_IN_LOG10F:
36557 case BUILT_IN_POWF:
36558 case BUILT_IN_TANHF:
36559 case BUILT_IN_TANF:
36560 case BUILT_IN_ATANF:
36561 case BUILT_IN_ATAN2F:
36562 case BUILT_IN_ATANHF:
36563 case BUILT_IN_CBRTF:
36564 case BUILT_IN_SINHF:
36565 case BUILT_IN_SINF:
36566 case BUILT_IN_ASINHF:
36567 case BUILT_IN_ASINF:
36568 case BUILT_IN_COSHF:
36569 case BUILT_IN_COSF:
36570 case BUILT_IN_ACOSHF:
36571 case BUILT_IN_ACOSF:
36572 if (el_mode != SFmode || n != 4)
36573 return NULL_TREE;
36574 break;
36576 default:
36577 return NULL_TREE;
36580 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36582 if (fn == BUILT_IN_LOGF)
36583 strcpy (name, "vmlsLn4");
36584 else if (fn == BUILT_IN_LOG)
36585 strcpy (name, "vmldLn2");
36586 else if (n == 4)
36588 sprintf (name, "vmls%s", bname+10);
36589 name[strlen (name)-1] = '4';
36591 else
36592 sprintf (name, "vmld%s2", bname+10);
36594 /* Convert to uppercase. */
36595 name[4] &= ~0x20;
36597 arity = 0;
36598 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36599 args;
36600 args = TREE_CHAIN (args))
36601 arity++;
36603 if (arity == 1)
36604 fntype = build_function_type_list (type_out, type_in, NULL);
36605 else
36606 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36608 /* Build a function declaration for the vectorized function. */
36609 new_fndecl = build_decl (BUILTINS_LOCATION,
36610 FUNCTION_DECL, get_identifier (name), fntype);
36611 TREE_PUBLIC (new_fndecl) = 1;
36612 DECL_EXTERNAL (new_fndecl) = 1;
36613 DECL_IS_NOVOPS (new_fndecl) = 1;
36614 TREE_READONLY (new_fndecl) = 1;
36616 return new_fndecl;
36619 /* Handler for an ACML-style interface to
36620 a library with vectorized intrinsics. */
36622 static tree
36623 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36625 char name[20] = "__vr.._";
36626 tree fntype, new_fndecl, args;
36627 unsigned arity;
36628 const char *bname;
36629 enum machine_mode el_mode, in_mode;
36630 int n, in_n;
36632 /* The ACML is 64bits only and suitable for unsafe math only as
36633 it does not correctly support parts of IEEE with the required
36634 precision such as denormals. */
36635 if (!TARGET_64BIT
36636 || !flag_unsafe_math_optimizations)
36637 return NULL_TREE;
36639 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36640 n = TYPE_VECTOR_SUBPARTS (type_out);
36641 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36642 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36643 if (el_mode != in_mode
36644 || n != in_n)
36645 return NULL_TREE;
36647 switch (fn)
36649 case BUILT_IN_SIN:
36650 case BUILT_IN_COS:
36651 case BUILT_IN_EXP:
36652 case BUILT_IN_LOG:
36653 case BUILT_IN_LOG2:
36654 case BUILT_IN_LOG10:
36655 name[4] = 'd';
36656 name[5] = '2';
36657 if (el_mode != DFmode
36658 || n != 2)
36659 return NULL_TREE;
36660 break;
36662 case BUILT_IN_SINF:
36663 case BUILT_IN_COSF:
36664 case BUILT_IN_EXPF:
36665 case BUILT_IN_POWF:
36666 case BUILT_IN_LOGF:
36667 case BUILT_IN_LOG2F:
36668 case BUILT_IN_LOG10F:
36669 name[4] = 's';
36670 name[5] = '4';
36671 if (el_mode != SFmode
36672 || n != 4)
36673 return NULL_TREE;
36674 break;
36676 default:
36677 return NULL_TREE;
36680 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36681 sprintf (name + 7, "%s", bname+10);
36683 arity = 0;
36684 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36685 args;
36686 args = TREE_CHAIN (args))
36687 arity++;
36689 if (arity == 1)
36690 fntype = build_function_type_list (type_out, type_in, NULL);
36691 else
36692 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36694 /* Build a function declaration for the vectorized function. */
36695 new_fndecl = build_decl (BUILTINS_LOCATION,
36696 FUNCTION_DECL, get_identifier (name), fntype);
36697 TREE_PUBLIC (new_fndecl) = 1;
36698 DECL_EXTERNAL (new_fndecl) = 1;
36699 DECL_IS_NOVOPS (new_fndecl) = 1;
36700 TREE_READONLY (new_fndecl) = 1;
36702 return new_fndecl;
36705 /* Returns a decl of a function that implements gather load with
36706 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36707 Return NULL_TREE if it is not available. */
36709 static tree
36710 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36711 const_tree index_type, int scale)
36713 bool si;
36714 enum ix86_builtins code;
36716 if (! TARGET_AVX2)
36717 return NULL_TREE;
36719 if ((TREE_CODE (index_type) != INTEGER_TYPE
36720 && !POINTER_TYPE_P (index_type))
36721 || (TYPE_MODE (index_type) != SImode
36722 && TYPE_MODE (index_type) != DImode))
36723 return NULL_TREE;
36725 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36726 return NULL_TREE;
36728 /* v*gather* insn sign extends index to pointer mode. */
36729 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36730 && TYPE_UNSIGNED (index_type))
36731 return NULL_TREE;
36733 if (scale <= 0
36734 || scale > 8
36735 || (scale & (scale - 1)) != 0)
36736 return NULL_TREE;
36738 si = TYPE_MODE (index_type) == SImode;
36739 switch (TYPE_MODE (mem_vectype))
36741 case V2DFmode:
36742 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36743 break;
36744 case V4DFmode:
36745 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36746 break;
36747 case V2DImode:
36748 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36749 break;
36750 case V4DImode:
36751 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36752 break;
36753 case V4SFmode:
36754 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36755 break;
36756 case V8SFmode:
36757 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36758 break;
36759 case V4SImode:
36760 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36761 break;
36762 case V8SImode:
36763 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36764 break;
36765 case V8DFmode:
36766 if (TARGET_AVX512F)
36767 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36768 else
36769 return NULL_TREE;
36770 break;
36771 case V8DImode:
36772 if (TARGET_AVX512F)
36773 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36774 else
36775 return NULL_TREE;
36776 break;
36777 case V16SFmode:
36778 if (TARGET_AVX512F)
36779 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36780 else
36781 return NULL_TREE;
36782 break;
36783 case V16SImode:
36784 if (TARGET_AVX512F)
36785 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36786 else
36787 return NULL_TREE;
36788 break;
36789 default:
36790 return NULL_TREE;
36793 return ix86_get_builtin (code);
36796 /* Returns a code for a target-specific builtin that implements
36797 reciprocal of the function, or NULL_TREE if not available. */
36799 static tree
36800 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36801 bool sqrt ATTRIBUTE_UNUSED)
36803 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36804 && flag_finite_math_only && !flag_trapping_math
36805 && flag_unsafe_math_optimizations))
36806 return NULL_TREE;
36808 if (md_fn)
36809 /* Machine dependent builtins. */
36810 switch (fn)
36812 /* Vectorized version of sqrt to rsqrt conversion. */
36813 case IX86_BUILTIN_SQRTPS_NR:
36814 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36816 case IX86_BUILTIN_SQRTPS_NR256:
36817 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36819 default:
36820 return NULL_TREE;
36822 else
36823 /* Normal builtins. */
36824 switch (fn)
36826 /* Sqrt to rsqrt conversion. */
36827 case BUILT_IN_SQRTF:
36828 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36830 default:
36831 return NULL_TREE;
36835 /* Helper for avx_vpermilps256_operand et al. This is also used by
36836 the expansion functions to turn the parallel back into a mask.
36837 The return value is 0 for no match and the imm8+1 for a match. */
36840 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36842 unsigned i, nelt = GET_MODE_NUNITS (mode);
36843 unsigned mask = 0;
36844 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36846 if (XVECLEN (par, 0) != (int) nelt)
36847 return 0;
36849 /* Validate that all of the elements are constants, and not totally
36850 out of range. Copy the data into an integral array to make the
36851 subsequent checks easier. */
36852 for (i = 0; i < nelt; ++i)
36854 rtx er = XVECEXP (par, 0, i);
36855 unsigned HOST_WIDE_INT ei;
36857 if (!CONST_INT_P (er))
36858 return 0;
36859 ei = INTVAL (er);
36860 if (ei >= nelt)
36861 return 0;
36862 ipar[i] = ei;
36865 switch (mode)
36867 case V8DFmode:
36868 /* In the 512-bit DFmode case, we can only move elements within
36869 a 128-bit lane. First fill the second part of the mask,
36870 then fallthru. */
36871 for (i = 4; i < 6; ++i)
36873 if (ipar[i] < 4 || ipar[i] >= 6)
36874 return 0;
36875 mask |= (ipar[i] - 4) << i;
36877 for (i = 6; i < 8; ++i)
36879 if (ipar[i] < 6)
36880 return 0;
36881 mask |= (ipar[i] - 6) << i;
36883 /* FALLTHRU */
36885 case V4DFmode:
36886 /* In the 256-bit DFmode case, we can only move elements within
36887 a 128-bit lane. */
36888 for (i = 0; i < 2; ++i)
36890 if (ipar[i] >= 2)
36891 return 0;
36892 mask |= ipar[i] << i;
36894 for (i = 2; i < 4; ++i)
36896 if (ipar[i] < 2)
36897 return 0;
36898 mask |= (ipar[i] - 2) << i;
36900 break;
36902 case V16SFmode:
36903 /* In 512 bit SFmode case, permutation in the upper 256 bits
36904 must mirror the permutation in the lower 256-bits. */
36905 for (i = 0; i < 8; ++i)
36906 if (ipar[i] + 8 != ipar[i + 8])
36907 return 0;
36908 /* FALLTHRU */
36910 case V8SFmode:
36911 /* In 256 bit SFmode case, we have full freedom of
36912 movement within the low 128-bit lane, but the high 128-bit
36913 lane must mirror the exact same pattern. */
36914 for (i = 0; i < 4; ++i)
36915 if (ipar[i] + 4 != ipar[i + 4])
36916 return 0;
36917 nelt = 4;
36918 /* FALLTHRU */
36920 case V2DFmode:
36921 case V4SFmode:
36922 /* In the 128-bit case, we've full freedom in the placement of
36923 the elements from the source operand. */
36924 for (i = 0; i < nelt; ++i)
36925 mask |= ipar[i] << (i * (nelt / 2));
36926 break;
36928 default:
36929 gcc_unreachable ();
36932 /* Make sure success has a non-zero value by adding one. */
36933 return mask + 1;
36936 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
36937 the expansion functions to turn the parallel back into a mask.
36938 The return value is 0 for no match and the imm8+1 for a match. */
36941 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
36943 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
36944 unsigned mask = 0;
36945 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
36947 if (XVECLEN (par, 0) != (int) nelt)
36948 return 0;
36950 /* Validate that all of the elements are constants, and not totally
36951 out of range. Copy the data into an integral array to make the
36952 subsequent checks easier. */
36953 for (i = 0; i < nelt; ++i)
36955 rtx er = XVECEXP (par, 0, i);
36956 unsigned HOST_WIDE_INT ei;
36958 if (!CONST_INT_P (er))
36959 return 0;
36960 ei = INTVAL (er);
36961 if (ei >= 2 * nelt)
36962 return 0;
36963 ipar[i] = ei;
36966 /* Validate that the halves of the permute are halves. */
36967 for (i = 0; i < nelt2 - 1; ++i)
36968 if (ipar[i] + 1 != ipar[i + 1])
36969 return 0;
36970 for (i = nelt2; i < nelt - 1; ++i)
36971 if (ipar[i] + 1 != ipar[i + 1])
36972 return 0;
36974 /* Reconstruct the mask. */
36975 for (i = 0; i < 2; ++i)
36977 unsigned e = ipar[i * nelt2];
36978 if (e % nelt2)
36979 return 0;
36980 e /= nelt2;
36981 mask |= e << (i * 4);
36984 /* Make sure success has a non-zero value by adding one. */
36985 return mask + 1;
36988 /* Return a register priority for hard reg REGNO. */
36989 static int
36990 ix86_register_priority (int hard_regno)
36992 /* ebp and r13 as the base always wants a displacement, r12 as the
36993 base always wants an index. So discourage their usage in an
36994 address. */
36995 if (hard_regno == R12_REG || hard_regno == R13_REG)
36996 return 0;
36997 if (hard_regno == BP_REG)
36998 return 1;
36999 /* New x86-64 int registers result in bigger code size. Discourage
37000 them. */
37001 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37002 return 2;
37003 /* New x86-64 SSE registers result in bigger code size. Discourage
37004 them. */
37005 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37006 return 2;
37007 /* Usage of AX register results in smaller code. Prefer it. */
37008 if (hard_regno == 0)
37009 return 4;
37010 return 3;
37013 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37015 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37016 QImode must go into class Q_REGS.
37017 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37018 movdf to do mem-to-mem moves through integer regs. */
37020 static reg_class_t
37021 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37023 enum machine_mode mode = GET_MODE (x);
37025 /* We're only allowed to return a subclass of CLASS. Many of the
37026 following checks fail for NO_REGS, so eliminate that early. */
37027 if (regclass == NO_REGS)
37028 return NO_REGS;
37030 /* All classes can load zeros. */
37031 if (x == CONST0_RTX (mode))
37032 return regclass;
37034 /* Force constants into memory if we are loading a (nonzero) constant into
37035 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37036 instructions to load from a constant. */
37037 if (CONSTANT_P (x)
37038 && (MAYBE_MMX_CLASS_P (regclass)
37039 || MAYBE_SSE_CLASS_P (regclass)
37040 || MAYBE_MASK_CLASS_P (regclass)))
37041 return NO_REGS;
37043 /* Prefer SSE regs only, if we can use them for math. */
37044 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37045 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37047 /* Floating-point constants need more complex checks. */
37048 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37050 /* General regs can load everything. */
37051 if (reg_class_subset_p (regclass, GENERAL_REGS))
37052 return regclass;
37054 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37055 zero above. We only want to wind up preferring 80387 registers if
37056 we plan on doing computation with them. */
37057 if (TARGET_80387
37058 && standard_80387_constant_p (x) > 0)
37060 /* Limit class to non-sse. */
37061 if (regclass == FLOAT_SSE_REGS)
37062 return FLOAT_REGS;
37063 if (regclass == FP_TOP_SSE_REGS)
37064 return FP_TOP_REG;
37065 if (regclass == FP_SECOND_SSE_REGS)
37066 return FP_SECOND_REG;
37067 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37068 return regclass;
37071 return NO_REGS;
37074 /* Generally when we see PLUS here, it's the function invariant
37075 (plus soft-fp const_int). Which can only be computed into general
37076 regs. */
37077 if (GET_CODE (x) == PLUS)
37078 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37080 /* QImode constants are easy to load, but non-constant QImode data
37081 must go into Q_REGS. */
37082 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37084 if (reg_class_subset_p (regclass, Q_REGS))
37085 return regclass;
37086 if (reg_class_subset_p (Q_REGS, regclass))
37087 return Q_REGS;
37088 return NO_REGS;
37091 return regclass;
37094 /* Discourage putting floating-point values in SSE registers unless
37095 SSE math is being used, and likewise for the 387 registers. */
37096 static reg_class_t
37097 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37099 enum machine_mode mode = GET_MODE (x);
37101 /* Restrict the output reload class to the register bank that we are doing
37102 math on. If we would like not to return a subset of CLASS, reject this
37103 alternative: if reload cannot do this, it will still use its choice. */
37104 mode = GET_MODE (x);
37105 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37106 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37108 if (X87_FLOAT_MODE_P (mode))
37110 if (regclass == FP_TOP_SSE_REGS)
37111 return FP_TOP_REG;
37112 else if (regclass == FP_SECOND_SSE_REGS)
37113 return FP_SECOND_REG;
37114 else
37115 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37118 return regclass;
37121 static reg_class_t
37122 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37123 enum machine_mode mode, secondary_reload_info *sri)
37125 /* Double-word spills from general registers to non-offsettable memory
37126 references (zero-extended addresses) require special handling. */
37127 if (TARGET_64BIT
37128 && MEM_P (x)
37129 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37130 && INTEGER_CLASS_P (rclass)
37131 && !offsettable_memref_p (x))
37133 sri->icode = (in_p
37134 ? CODE_FOR_reload_noff_load
37135 : CODE_FOR_reload_noff_store);
37136 /* Add the cost of moving address to a temporary. */
37137 sri->extra_cost = 1;
37139 return NO_REGS;
37142 /* QImode spills from non-QI registers require
37143 intermediate register on 32bit targets. */
37144 if (mode == QImode
37145 && (MAYBE_MASK_CLASS_P (rclass)
37146 || (!TARGET_64BIT && !in_p
37147 && INTEGER_CLASS_P (rclass)
37148 && MAYBE_NON_Q_CLASS_P (rclass))))
37150 int regno;
37152 if (REG_P (x))
37153 regno = REGNO (x);
37154 else
37155 regno = -1;
37157 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37158 regno = true_regnum (x);
37160 /* Return Q_REGS if the operand is in memory. */
37161 if (regno == -1)
37162 return Q_REGS;
37165 /* This condition handles corner case where an expression involving
37166 pointers gets vectorized. We're trying to use the address of a
37167 stack slot as a vector initializer.
37169 (set (reg:V2DI 74 [ vect_cst_.2 ])
37170 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37172 Eventually frame gets turned into sp+offset like this:
37174 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37175 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37176 (const_int 392 [0x188]))))
37178 That later gets turned into:
37180 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37181 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37182 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37184 We'll have the following reload recorded:
37186 Reload 0: reload_in (DI) =
37187 (plus:DI (reg/f:DI 7 sp)
37188 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37189 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37190 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37191 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37192 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37193 reload_reg_rtx: (reg:V2DI 22 xmm1)
37195 Which isn't going to work since SSE instructions can't handle scalar
37196 additions. Returning GENERAL_REGS forces the addition into integer
37197 register and reload can handle subsequent reloads without problems. */
37199 if (in_p && GET_CODE (x) == PLUS
37200 && SSE_CLASS_P (rclass)
37201 && SCALAR_INT_MODE_P (mode))
37202 return GENERAL_REGS;
37204 return NO_REGS;
37207 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37209 static bool
37210 ix86_class_likely_spilled_p (reg_class_t rclass)
37212 switch (rclass)
37214 case AREG:
37215 case DREG:
37216 case CREG:
37217 case BREG:
37218 case AD_REGS:
37219 case SIREG:
37220 case DIREG:
37221 case SSE_FIRST_REG:
37222 case FP_TOP_REG:
37223 case FP_SECOND_REG:
37224 return true;
37226 default:
37227 break;
37230 return false;
37233 /* If we are copying between general and FP registers, we need a memory
37234 location. The same is true for SSE and MMX registers.
37236 To optimize register_move_cost performance, allow inline variant.
37238 The macro can't work reliably when one of the CLASSES is class containing
37239 registers from multiple units (SSE, MMX, integer). We avoid this by never
37240 combining those units in single alternative in the machine description.
37241 Ensure that this constraint holds to avoid unexpected surprises.
37243 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37244 enforce these sanity checks. */
37246 static inline bool
37247 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37248 enum machine_mode mode, int strict)
37250 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37251 return false;
37252 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37253 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37254 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37255 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37256 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37257 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37259 gcc_assert (!strict || lra_in_progress);
37260 return true;
37263 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37264 return true;
37266 /* ??? This is a lie. We do have moves between mmx/general, and for
37267 mmx/sse2. But by saying we need secondary memory we discourage the
37268 register allocator from using the mmx registers unless needed. */
37269 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37270 return true;
37272 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37274 /* SSE1 doesn't have any direct moves from other classes. */
37275 if (!TARGET_SSE2)
37276 return true;
37278 /* If the target says that inter-unit moves are more expensive
37279 than moving through memory, then don't generate them. */
37280 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37281 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37282 return true;
37284 /* Between SSE and general, we have moves no larger than word size. */
37285 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37286 return true;
37289 return false;
37292 bool
37293 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37294 enum machine_mode mode, int strict)
37296 return inline_secondary_memory_needed (class1, class2, mode, strict);
37299 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37301 On the 80386, this is the size of MODE in words,
37302 except in the FP regs, where a single reg is always enough. */
37304 static unsigned char
37305 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37307 if (MAYBE_INTEGER_CLASS_P (rclass))
37309 if (mode == XFmode)
37310 return (TARGET_64BIT ? 2 : 3);
37311 else if (mode == XCmode)
37312 return (TARGET_64BIT ? 4 : 6);
37313 else
37314 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37316 else
37318 if (COMPLEX_MODE_P (mode))
37319 return 2;
37320 else
37321 return 1;
37325 /* Return true if the registers in CLASS cannot represent the change from
37326 modes FROM to TO. */
37328 bool
37329 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37330 enum reg_class regclass)
37332 if (from == to)
37333 return false;
37335 /* x87 registers can't do subreg at all, as all values are reformatted
37336 to extended precision. */
37337 if (MAYBE_FLOAT_CLASS_P (regclass))
37338 return true;
37340 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37342 /* Vector registers do not support QI or HImode loads. If we don't
37343 disallow a change to these modes, reload will assume it's ok to
37344 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37345 the vec_dupv4hi pattern. */
37346 if (GET_MODE_SIZE (from) < 4)
37347 return true;
37349 /* Vector registers do not support subreg with nonzero offsets, which
37350 are otherwise valid for integer registers. Since we can't see
37351 whether we have a nonzero offset from here, prohibit all
37352 nonparadoxical subregs changing size. */
37353 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37354 return true;
37357 return false;
37360 /* Return the cost of moving data of mode M between a
37361 register and memory. A value of 2 is the default; this cost is
37362 relative to those in `REGISTER_MOVE_COST'.
37364 This function is used extensively by register_move_cost that is used to
37365 build tables at startup. Make it inline in this case.
37366 When IN is 2, return maximum of in and out move cost.
37368 If moving between registers and memory is more expensive than
37369 between two registers, you should define this macro to express the
37370 relative cost.
37372 Model also increased moving costs of QImode registers in non
37373 Q_REGS classes.
37375 static inline int
37376 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37377 int in)
37379 int cost;
37380 if (FLOAT_CLASS_P (regclass))
37382 int index;
37383 switch (mode)
37385 case SFmode:
37386 index = 0;
37387 break;
37388 case DFmode:
37389 index = 1;
37390 break;
37391 case XFmode:
37392 index = 2;
37393 break;
37394 default:
37395 return 100;
37397 if (in == 2)
37398 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37399 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37401 if (SSE_CLASS_P (regclass))
37403 int index;
37404 switch (GET_MODE_SIZE (mode))
37406 case 4:
37407 index = 0;
37408 break;
37409 case 8:
37410 index = 1;
37411 break;
37412 case 16:
37413 index = 2;
37414 break;
37415 default:
37416 return 100;
37418 if (in == 2)
37419 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37420 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37422 if (MMX_CLASS_P (regclass))
37424 int index;
37425 switch (GET_MODE_SIZE (mode))
37427 case 4:
37428 index = 0;
37429 break;
37430 case 8:
37431 index = 1;
37432 break;
37433 default:
37434 return 100;
37436 if (in)
37437 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37438 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37440 switch (GET_MODE_SIZE (mode))
37442 case 1:
37443 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37445 if (!in)
37446 return ix86_cost->int_store[0];
37447 if (TARGET_PARTIAL_REG_DEPENDENCY
37448 && optimize_function_for_speed_p (cfun))
37449 cost = ix86_cost->movzbl_load;
37450 else
37451 cost = ix86_cost->int_load[0];
37452 if (in == 2)
37453 return MAX (cost, ix86_cost->int_store[0]);
37454 return cost;
37456 else
37458 if (in == 2)
37459 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37460 if (in)
37461 return ix86_cost->movzbl_load;
37462 else
37463 return ix86_cost->int_store[0] + 4;
37465 break;
37466 case 2:
37467 if (in == 2)
37468 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37469 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37470 default:
37471 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37472 if (mode == TFmode)
37473 mode = XFmode;
37474 if (in == 2)
37475 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37476 else if (in)
37477 cost = ix86_cost->int_load[2];
37478 else
37479 cost = ix86_cost->int_store[2];
37480 return (cost * (((int) GET_MODE_SIZE (mode)
37481 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37485 static int
37486 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37487 bool in)
37489 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37493 /* Return the cost of moving data from a register in class CLASS1 to
37494 one in class CLASS2.
37496 It is not required that the cost always equal 2 when FROM is the same as TO;
37497 on some machines it is expensive to move between registers if they are not
37498 general registers. */
37500 static int
37501 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37502 reg_class_t class2_i)
37504 enum reg_class class1 = (enum reg_class) class1_i;
37505 enum reg_class class2 = (enum reg_class) class2_i;
37507 /* In case we require secondary memory, compute cost of the store followed
37508 by load. In order to avoid bad register allocation choices, we need
37509 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37511 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37513 int cost = 1;
37515 cost += inline_memory_move_cost (mode, class1, 2);
37516 cost += inline_memory_move_cost (mode, class2, 2);
37518 /* In case of copying from general_purpose_register we may emit multiple
37519 stores followed by single load causing memory size mismatch stall.
37520 Count this as arbitrarily high cost of 20. */
37521 if (targetm.class_max_nregs (class1, mode)
37522 > targetm.class_max_nregs (class2, mode))
37523 cost += 20;
37525 /* In the case of FP/MMX moves, the registers actually overlap, and we
37526 have to switch modes in order to treat them differently. */
37527 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37528 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37529 cost += 20;
37531 return cost;
37534 /* Moves between SSE/MMX and integer unit are expensive. */
37535 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37536 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37538 /* ??? By keeping returned value relatively high, we limit the number
37539 of moves between integer and MMX/SSE registers for all targets.
37540 Additionally, high value prevents problem with x86_modes_tieable_p(),
37541 where integer modes in MMX/SSE registers are not tieable
37542 because of missing QImode and HImode moves to, from or between
37543 MMX/SSE registers. */
37544 return MAX (8, ix86_cost->mmxsse_to_integer);
37546 if (MAYBE_FLOAT_CLASS_P (class1))
37547 return ix86_cost->fp_move;
37548 if (MAYBE_SSE_CLASS_P (class1))
37549 return ix86_cost->sse_move;
37550 if (MAYBE_MMX_CLASS_P (class1))
37551 return ix86_cost->mmx_move;
37552 return 2;
37555 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37556 MODE. */
37558 bool
37559 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37561 /* Flags and only flags can only hold CCmode values. */
37562 if (CC_REGNO_P (regno))
37563 return GET_MODE_CLASS (mode) == MODE_CC;
37564 if (GET_MODE_CLASS (mode) == MODE_CC
37565 || GET_MODE_CLASS (mode) == MODE_RANDOM
37566 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37567 return false;
37568 if (STACK_REGNO_P (regno))
37569 return VALID_FP_MODE_P (mode);
37570 if (MASK_REGNO_P (regno))
37571 return VALID_MASK_REG_MODE (mode);
37572 if (SSE_REGNO_P (regno))
37574 /* We implement the move patterns for all vector modes into and
37575 out of SSE registers, even when no operation instructions
37576 are available. */
37578 /* For AVX-512 we allow, regardless of regno:
37579 - XI mode
37580 - any of 512-bit wide vector mode
37581 - any scalar mode. */
37582 if (TARGET_AVX512F
37583 && (mode == XImode
37584 || VALID_AVX512F_REG_MODE (mode)
37585 || VALID_AVX512F_SCALAR_MODE (mode)))
37586 return true;
37588 /* xmm16-xmm31 are only available for AVX-512. */
37589 if (EXT_REX_SSE_REGNO_P (regno))
37590 return false;
37592 /* OImode and AVX modes are available only when AVX is enabled. */
37593 return ((TARGET_AVX
37594 && VALID_AVX256_REG_OR_OI_MODE (mode))
37595 || VALID_SSE_REG_MODE (mode)
37596 || VALID_SSE2_REG_MODE (mode)
37597 || VALID_MMX_REG_MODE (mode)
37598 || VALID_MMX_REG_MODE_3DNOW (mode));
37600 if (MMX_REGNO_P (regno))
37602 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37603 so if the register is available at all, then we can move data of
37604 the given mode into or out of it. */
37605 return (VALID_MMX_REG_MODE (mode)
37606 || VALID_MMX_REG_MODE_3DNOW (mode));
37609 if (mode == QImode)
37611 /* Take care for QImode values - they can be in non-QI regs,
37612 but then they do cause partial register stalls. */
37613 if (ANY_QI_REGNO_P (regno))
37614 return true;
37615 if (!TARGET_PARTIAL_REG_STALL)
37616 return true;
37617 /* LRA checks if the hard register is OK for the given mode.
37618 QImode values can live in non-QI regs, so we allow all
37619 registers here. */
37620 if (lra_in_progress)
37621 return true;
37622 return !can_create_pseudo_p ();
37624 /* We handle both integer and floats in the general purpose registers. */
37625 else if (VALID_INT_MODE_P (mode))
37626 return true;
37627 else if (VALID_FP_MODE_P (mode))
37628 return true;
37629 else if (VALID_DFP_MODE_P (mode))
37630 return true;
37631 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37632 on to use that value in smaller contexts, this can easily force a
37633 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37634 supporting DImode, allow it. */
37635 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37636 return true;
37638 return false;
37641 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37642 tieable integer mode. */
37644 static bool
37645 ix86_tieable_integer_mode_p (enum machine_mode mode)
37647 switch (mode)
37649 case HImode:
37650 case SImode:
37651 return true;
37653 case QImode:
37654 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37656 case DImode:
37657 return TARGET_64BIT;
37659 default:
37660 return false;
37664 /* Return true if MODE1 is accessible in a register that can hold MODE2
37665 without copying. That is, all register classes that can hold MODE2
37666 can also hold MODE1. */
37668 bool
37669 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37671 if (mode1 == mode2)
37672 return true;
37674 if (ix86_tieable_integer_mode_p (mode1)
37675 && ix86_tieable_integer_mode_p (mode2))
37676 return true;
37678 /* MODE2 being XFmode implies fp stack or general regs, which means we
37679 can tie any smaller floating point modes to it. Note that we do not
37680 tie this with TFmode. */
37681 if (mode2 == XFmode)
37682 return mode1 == SFmode || mode1 == DFmode;
37684 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37685 that we can tie it with SFmode. */
37686 if (mode2 == DFmode)
37687 return mode1 == SFmode;
37689 /* If MODE2 is only appropriate for an SSE register, then tie with
37690 any other mode acceptable to SSE registers. */
37691 if (GET_MODE_SIZE (mode2) == 32
37692 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37693 return (GET_MODE_SIZE (mode1) == 32
37694 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37695 if (GET_MODE_SIZE (mode2) == 16
37696 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37697 return (GET_MODE_SIZE (mode1) == 16
37698 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37700 /* If MODE2 is appropriate for an MMX register, then tie
37701 with any other mode acceptable to MMX registers. */
37702 if (GET_MODE_SIZE (mode2) == 8
37703 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37704 return (GET_MODE_SIZE (mode1) == 8
37705 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37707 return false;
37710 /* Return the cost of moving between two registers of mode MODE. */
37712 static int
37713 ix86_set_reg_reg_cost (enum machine_mode mode)
37715 unsigned int units = UNITS_PER_WORD;
37717 switch (GET_MODE_CLASS (mode))
37719 default:
37720 break;
37722 case MODE_CC:
37723 units = GET_MODE_SIZE (CCmode);
37724 break;
37726 case MODE_FLOAT:
37727 if ((TARGET_SSE && mode == TFmode)
37728 || (TARGET_80387 && mode == XFmode)
37729 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37730 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37731 units = GET_MODE_SIZE (mode);
37732 break;
37734 case MODE_COMPLEX_FLOAT:
37735 if ((TARGET_SSE && mode == TCmode)
37736 || (TARGET_80387 && mode == XCmode)
37737 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37738 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37739 units = GET_MODE_SIZE (mode);
37740 break;
37742 case MODE_VECTOR_INT:
37743 case MODE_VECTOR_FLOAT:
37744 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37745 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37746 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37747 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37748 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37749 units = GET_MODE_SIZE (mode);
37752 /* Return the cost of moving between two registers of mode MODE,
37753 assuming that the move will be in pieces of at most UNITS bytes. */
37754 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37757 /* Compute a (partial) cost for rtx X. Return true if the complete
37758 cost has been computed, and false if subexpressions should be
37759 scanned. In either case, *TOTAL contains the cost result. */
37761 static bool
37762 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37763 bool speed)
37765 rtx mask;
37766 enum rtx_code code = (enum rtx_code) code_i;
37767 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37768 enum machine_mode mode = GET_MODE (x);
37769 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37771 switch (code)
37773 case SET:
37774 if (register_operand (SET_DEST (x), VOIDmode)
37775 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37777 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37778 return true;
37780 return false;
37782 case CONST_INT:
37783 case CONST:
37784 case LABEL_REF:
37785 case SYMBOL_REF:
37786 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37787 *total = 3;
37788 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37789 *total = 2;
37790 else if (flag_pic && SYMBOLIC_CONST (x)
37791 && (!TARGET_64BIT
37792 || (!GET_CODE (x) != LABEL_REF
37793 && (GET_CODE (x) != SYMBOL_REF
37794 || !SYMBOL_REF_LOCAL_P (x)))))
37795 *total = 1;
37796 else
37797 *total = 0;
37798 return true;
37800 case CONST_DOUBLE:
37801 if (mode == VOIDmode)
37803 *total = 0;
37804 return true;
37806 switch (standard_80387_constant_p (x))
37808 case 1: /* 0.0 */
37809 *total = 1;
37810 return true;
37811 default: /* Other constants */
37812 *total = 2;
37813 return true;
37814 case 0:
37815 case -1:
37816 break;
37818 if (SSE_FLOAT_MODE_P (mode))
37820 case CONST_VECTOR:
37821 switch (standard_sse_constant_p (x))
37823 case 0:
37824 break;
37825 case 1: /* 0: xor eliminates false dependency */
37826 *total = 0;
37827 return true;
37828 default: /* -1: cmp contains false dependency */
37829 *total = 1;
37830 return true;
37833 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37834 it'll probably end up. Add a penalty for size. */
37835 *total = (COSTS_N_INSNS (1)
37836 + (flag_pic != 0 && !TARGET_64BIT)
37837 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37838 return true;
37840 case ZERO_EXTEND:
37841 /* The zero extensions is often completely free on x86_64, so make
37842 it as cheap as possible. */
37843 if (TARGET_64BIT && mode == DImode
37844 && GET_MODE (XEXP (x, 0)) == SImode)
37845 *total = 1;
37846 else if (TARGET_ZERO_EXTEND_WITH_AND)
37847 *total = cost->add;
37848 else
37849 *total = cost->movzx;
37850 return false;
37852 case SIGN_EXTEND:
37853 *total = cost->movsx;
37854 return false;
37856 case ASHIFT:
37857 if (SCALAR_INT_MODE_P (mode)
37858 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37859 && CONST_INT_P (XEXP (x, 1)))
37861 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37862 if (value == 1)
37864 *total = cost->add;
37865 return false;
37867 if ((value == 2 || value == 3)
37868 && cost->lea <= cost->shift_const)
37870 *total = cost->lea;
37871 return false;
37874 /* FALLTHRU */
37876 case ROTATE:
37877 case ASHIFTRT:
37878 case LSHIFTRT:
37879 case ROTATERT:
37880 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37882 /* ??? Should be SSE vector operation cost. */
37883 /* At least for published AMD latencies, this really is the same
37884 as the latency for a simple fpu operation like fabs. */
37885 /* V*QImode is emulated with 1-11 insns. */
37886 if (mode == V16QImode || mode == V32QImode)
37888 int count = 11;
37889 if (TARGET_XOP && mode == V16QImode)
37891 /* For XOP we use vpshab, which requires a broadcast of the
37892 value to the variable shift insn. For constants this
37893 means a V16Q const in mem; even when we can perform the
37894 shift with one insn set the cost to prefer paddb. */
37895 if (CONSTANT_P (XEXP (x, 1)))
37897 *total = (cost->fabs
37898 + rtx_cost (XEXP (x, 0), code, 0, speed)
37899 + (speed ? 2 : COSTS_N_BYTES (16)));
37900 return true;
37902 count = 3;
37904 else if (TARGET_SSSE3)
37905 count = 7;
37906 *total = cost->fabs * count;
37908 else
37909 *total = cost->fabs;
37911 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37913 if (CONST_INT_P (XEXP (x, 1)))
37915 if (INTVAL (XEXP (x, 1)) > 32)
37916 *total = cost->shift_const + COSTS_N_INSNS (2);
37917 else
37918 *total = cost->shift_const * 2;
37920 else
37922 if (GET_CODE (XEXP (x, 1)) == AND)
37923 *total = cost->shift_var * 2;
37924 else
37925 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
37928 else
37930 if (CONST_INT_P (XEXP (x, 1)))
37931 *total = cost->shift_const;
37932 else if (GET_CODE (XEXP (x, 1)) == SUBREG
37933 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
37935 /* Return the cost after shift-and truncation. */
37936 *total = cost->shift_var;
37937 return true;
37939 else
37940 *total = cost->shift_var;
37942 return false;
37944 case FMA:
37946 rtx sub;
37948 gcc_assert (FLOAT_MODE_P (mode));
37949 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
37951 /* ??? SSE scalar/vector cost should be used here. */
37952 /* ??? Bald assumption that fma has the same cost as fmul. */
37953 *total = cost->fmul;
37954 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
37956 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
37957 sub = XEXP (x, 0);
37958 if (GET_CODE (sub) == NEG)
37959 sub = XEXP (sub, 0);
37960 *total += rtx_cost (sub, FMA, 0, speed);
37962 sub = XEXP (x, 2);
37963 if (GET_CODE (sub) == NEG)
37964 sub = XEXP (sub, 0);
37965 *total += rtx_cost (sub, FMA, 2, speed);
37966 return true;
37969 case MULT:
37970 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
37972 /* ??? SSE scalar cost should be used here. */
37973 *total = cost->fmul;
37974 return false;
37976 else if (X87_FLOAT_MODE_P (mode))
37978 *total = cost->fmul;
37979 return false;
37981 else if (FLOAT_MODE_P (mode))
37983 /* ??? SSE vector cost should be used here. */
37984 *total = cost->fmul;
37985 return false;
37987 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37989 /* V*QImode is emulated with 7-13 insns. */
37990 if (mode == V16QImode || mode == V32QImode)
37992 int extra = 11;
37993 if (TARGET_XOP && mode == V16QImode)
37994 extra = 5;
37995 else if (TARGET_SSSE3)
37996 extra = 6;
37997 *total = cost->fmul * 2 + cost->fabs * extra;
37999 /* V*DImode is emulated with 5-8 insns. */
38000 else if (mode == V2DImode || mode == V4DImode)
38002 if (TARGET_XOP && mode == V2DImode)
38003 *total = cost->fmul * 2 + cost->fabs * 3;
38004 else
38005 *total = cost->fmul * 3 + cost->fabs * 5;
38007 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38008 insns, including two PMULUDQ. */
38009 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38010 *total = cost->fmul * 2 + cost->fabs * 5;
38011 else
38012 *total = cost->fmul;
38013 return false;
38015 else
38017 rtx op0 = XEXP (x, 0);
38018 rtx op1 = XEXP (x, 1);
38019 int nbits;
38020 if (CONST_INT_P (XEXP (x, 1)))
38022 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38023 for (nbits = 0; value != 0; value &= value - 1)
38024 nbits++;
38026 else
38027 /* This is arbitrary. */
38028 nbits = 7;
38030 /* Compute costs correctly for widening multiplication. */
38031 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38032 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38033 == GET_MODE_SIZE (mode))
38035 int is_mulwiden = 0;
38036 enum machine_mode inner_mode = GET_MODE (op0);
38038 if (GET_CODE (op0) == GET_CODE (op1))
38039 is_mulwiden = 1, op1 = XEXP (op1, 0);
38040 else if (CONST_INT_P (op1))
38042 if (GET_CODE (op0) == SIGN_EXTEND)
38043 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38044 == INTVAL (op1);
38045 else
38046 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38049 if (is_mulwiden)
38050 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38053 *total = (cost->mult_init[MODE_INDEX (mode)]
38054 + nbits * cost->mult_bit
38055 + rtx_cost (op0, outer_code, opno, speed)
38056 + rtx_cost (op1, outer_code, opno, speed));
38058 return true;
38061 case DIV:
38062 case UDIV:
38063 case MOD:
38064 case UMOD:
38065 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38066 /* ??? SSE cost should be used here. */
38067 *total = cost->fdiv;
38068 else if (X87_FLOAT_MODE_P (mode))
38069 *total = cost->fdiv;
38070 else if (FLOAT_MODE_P (mode))
38071 /* ??? SSE vector cost should be used here. */
38072 *total = cost->fdiv;
38073 else
38074 *total = cost->divide[MODE_INDEX (mode)];
38075 return false;
38077 case PLUS:
38078 if (GET_MODE_CLASS (mode) == MODE_INT
38079 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38081 if (GET_CODE (XEXP (x, 0)) == PLUS
38082 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38083 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38084 && CONSTANT_P (XEXP (x, 1)))
38086 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38087 if (val == 2 || val == 4 || val == 8)
38089 *total = cost->lea;
38090 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38091 outer_code, opno, speed);
38092 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38093 outer_code, opno, speed);
38094 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38095 return true;
38098 else if (GET_CODE (XEXP (x, 0)) == MULT
38099 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38101 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38102 if (val == 2 || val == 4 || val == 8)
38104 *total = cost->lea;
38105 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38106 outer_code, opno, speed);
38107 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38108 return true;
38111 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38113 *total = cost->lea;
38114 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38115 outer_code, opno, speed);
38116 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38117 outer_code, opno, speed);
38118 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38119 return true;
38122 /* FALLTHRU */
38124 case MINUS:
38125 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38127 /* ??? SSE cost should be used here. */
38128 *total = cost->fadd;
38129 return false;
38131 else if (X87_FLOAT_MODE_P (mode))
38133 *total = cost->fadd;
38134 return false;
38136 else if (FLOAT_MODE_P (mode))
38138 /* ??? SSE vector cost should be used here. */
38139 *total = cost->fadd;
38140 return false;
38142 /* FALLTHRU */
38144 case AND:
38145 case IOR:
38146 case XOR:
38147 if (GET_MODE_CLASS (mode) == MODE_INT
38148 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38150 *total = (cost->add * 2
38151 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38152 << (GET_MODE (XEXP (x, 0)) != DImode))
38153 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38154 << (GET_MODE (XEXP (x, 1)) != DImode)));
38155 return true;
38157 /* FALLTHRU */
38159 case NEG:
38160 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38162 /* ??? SSE cost should be used here. */
38163 *total = cost->fchs;
38164 return false;
38166 else if (X87_FLOAT_MODE_P (mode))
38168 *total = cost->fchs;
38169 return false;
38171 else if (FLOAT_MODE_P (mode))
38173 /* ??? SSE vector cost should be used here. */
38174 *total = cost->fchs;
38175 return false;
38177 /* FALLTHRU */
38179 case NOT:
38180 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38182 /* ??? Should be SSE vector operation cost. */
38183 /* At least for published AMD latencies, this really is the same
38184 as the latency for a simple fpu operation like fabs. */
38185 *total = cost->fabs;
38187 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38188 *total = cost->add * 2;
38189 else
38190 *total = cost->add;
38191 return false;
38193 case COMPARE:
38194 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38195 && XEXP (XEXP (x, 0), 1) == const1_rtx
38196 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38197 && XEXP (x, 1) == const0_rtx)
38199 /* This kind of construct is implemented using test[bwl].
38200 Treat it as if we had an AND. */
38201 *total = (cost->add
38202 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38203 + rtx_cost (const1_rtx, outer_code, opno, speed));
38204 return true;
38206 return false;
38208 case FLOAT_EXTEND:
38209 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38210 *total = 0;
38211 return false;
38213 case ABS:
38214 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38215 /* ??? SSE cost should be used here. */
38216 *total = cost->fabs;
38217 else if (X87_FLOAT_MODE_P (mode))
38218 *total = cost->fabs;
38219 else if (FLOAT_MODE_P (mode))
38220 /* ??? SSE vector cost should be used here. */
38221 *total = cost->fabs;
38222 return false;
38224 case SQRT:
38225 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38226 /* ??? SSE cost should be used here. */
38227 *total = cost->fsqrt;
38228 else if (X87_FLOAT_MODE_P (mode))
38229 *total = cost->fsqrt;
38230 else if (FLOAT_MODE_P (mode))
38231 /* ??? SSE vector cost should be used here. */
38232 *total = cost->fsqrt;
38233 return false;
38235 case UNSPEC:
38236 if (XINT (x, 1) == UNSPEC_TP)
38237 *total = 0;
38238 return false;
38240 case VEC_SELECT:
38241 case VEC_CONCAT:
38242 case VEC_DUPLICATE:
38243 /* ??? Assume all of these vector manipulation patterns are
38244 recognizable. In which case they all pretty much have the
38245 same cost. */
38246 *total = cost->fabs;
38247 return true;
38248 case VEC_MERGE:
38249 mask = XEXP (x, 2);
38250 /* This is masked instruction, assume the same cost,
38251 as nonmasked variant. */
38252 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38253 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38254 else
38255 *total = cost->fabs;
38256 return true;
38258 default:
38259 return false;
38263 #if TARGET_MACHO
38265 static int current_machopic_label_num;
38267 /* Given a symbol name and its associated stub, write out the
38268 definition of the stub. */
38270 void
38271 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38273 unsigned int length;
38274 char *binder_name, *symbol_name, lazy_ptr_name[32];
38275 int label = ++current_machopic_label_num;
38277 /* For 64-bit we shouldn't get here. */
38278 gcc_assert (!TARGET_64BIT);
38280 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38281 symb = targetm.strip_name_encoding (symb);
38283 length = strlen (stub);
38284 binder_name = XALLOCAVEC (char, length + 32);
38285 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38287 length = strlen (symb);
38288 symbol_name = XALLOCAVEC (char, length + 32);
38289 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38291 sprintf (lazy_ptr_name, "L%d$lz", label);
38293 if (MACHOPIC_ATT_STUB)
38294 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38295 else if (MACHOPIC_PURE)
38296 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38297 else
38298 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38300 fprintf (file, "%s:\n", stub);
38301 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38303 if (MACHOPIC_ATT_STUB)
38305 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38307 else if (MACHOPIC_PURE)
38309 /* PIC stub. */
38310 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38311 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38312 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38313 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38314 label, lazy_ptr_name, label);
38315 fprintf (file, "\tjmp\t*%%ecx\n");
38317 else
38318 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38320 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38321 it needs no stub-binding-helper. */
38322 if (MACHOPIC_ATT_STUB)
38323 return;
38325 fprintf (file, "%s:\n", binder_name);
38327 if (MACHOPIC_PURE)
38329 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38330 fprintf (file, "\tpushl\t%%ecx\n");
38332 else
38333 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38335 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38337 /* N.B. Keep the correspondence of these
38338 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38339 old-pic/new-pic/non-pic stubs; altering this will break
38340 compatibility with existing dylibs. */
38341 if (MACHOPIC_PURE)
38343 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38344 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38346 else
38347 /* 16-byte -mdynamic-no-pic stub. */
38348 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38350 fprintf (file, "%s:\n", lazy_ptr_name);
38351 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38352 fprintf (file, ASM_LONG "%s\n", binder_name);
38354 #endif /* TARGET_MACHO */
38356 /* Order the registers for register allocator. */
38358 void
38359 x86_order_regs_for_local_alloc (void)
38361 int pos = 0;
38362 int i;
38364 /* First allocate the local general purpose registers. */
38365 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38366 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38367 reg_alloc_order [pos++] = i;
38369 /* Global general purpose registers. */
38370 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38371 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38372 reg_alloc_order [pos++] = i;
38374 /* x87 registers come first in case we are doing FP math
38375 using them. */
38376 if (!TARGET_SSE_MATH)
38377 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38378 reg_alloc_order [pos++] = i;
38380 /* SSE registers. */
38381 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38382 reg_alloc_order [pos++] = i;
38383 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38384 reg_alloc_order [pos++] = i;
38386 /* Extended REX SSE registers. */
38387 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38388 reg_alloc_order [pos++] = i;
38390 /* Mask register. */
38391 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38392 reg_alloc_order [pos++] = i;
38394 /* x87 registers. */
38395 if (TARGET_SSE_MATH)
38396 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38397 reg_alloc_order [pos++] = i;
38399 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38400 reg_alloc_order [pos++] = i;
38402 /* Initialize the rest of array as we do not allocate some registers
38403 at all. */
38404 while (pos < FIRST_PSEUDO_REGISTER)
38405 reg_alloc_order [pos++] = 0;
38408 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38409 in struct attribute_spec handler. */
38410 static tree
38411 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38412 tree args,
38413 int flags ATTRIBUTE_UNUSED,
38414 bool *no_add_attrs)
38416 if (TREE_CODE (*node) != FUNCTION_TYPE
38417 && TREE_CODE (*node) != METHOD_TYPE
38418 && TREE_CODE (*node) != FIELD_DECL
38419 && TREE_CODE (*node) != TYPE_DECL)
38421 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38422 name);
38423 *no_add_attrs = true;
38424 return NULL_TREE;
38426 if (TARGET_64BIT)
38428 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38429 name);
38430 *no_add_attrs = true;
38431 return NULL_TREE;
38433 if (is_attribute_p ("callee_pop_aggregate_return", name))
38435 tree cst;
38437 cst = TREE_VALUE (args);
38438 if (TREE_CODE (cst) != INTEGER_CST)
38440 warning (OPT_Wattributes,
38441 "%qE attribute requires an integer constant argument",
38442 name);
38443 *no_add_attrs = true;
38445 else if (compare_tree_int (cst, 0) != 0
38446 && compare_tree_int (cst, 1) != 0)
38448 warning (OPT_Wattributes,
38449 "argument to %qE attribute is neither zero, nor one",
38450 name);
38451 *no_add_attrs = true;
38454 return NULL_TREE;
38457 return NULL_TREE;
38460 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38461 struct attribute_spec.handler. */
38462 static tree
38463 ix86_handle_abi_attribute (tree *node, tree name,
38464 tree args ATTRIBUTE_UNUSED,
38465 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38467 if (TREE_CODE (*node) != FUNCTION_TYPE
38468 && TREE_CODE (*node) != METHOD_TYPE
38469 && TREE_CODE (*node) != FIELD_DECL
38470 && TREE_CODE (*node) != TYPE_DECL)
38472 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38473 name);
38474 *no_add_attrs = true;
38475 return NULL_TREE;
38478 /* Can combine regparm with all attributes but fastcall. */
38479 if (is_attribute_p ("ms_abi", name))
38481 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38483 error ("ms_abi and sysv_abi attributes are not compatible");
38486 return NULL_TREE;
38488 else if (is_attribute_p ("sysv_abi", name))
38490 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38492 error ("ms_abi and sysv_abi attributes are not compatible");
38495 return NULL_TREE;
38498 return NULL_TREE;
38501 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38502 struct attribute_spec.handler. */
38503 static tree
38504 ix86_handle_struct_attribute (tree *node, tree name,
38505 tree args ATTRIBUTE_UNUSED,
38506 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38508 tree *type = NULL;
38509 if (DECL_P (*node))
38511 if (TREE_CODE (*node) == TYPE_DECL)
38512 type = &TREE_TYPE (*node);
38514 else
38515 type = node;
38517 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38519 warning (OPT_Wattributes, "%qE attribute ignored",
38520 name);
38521 *no_add_attrs = true;
38524 else if ((is_attribute_p ("ms_struct", name)
38525 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38526 || ((is_attribute_p ("gcc_struct", name)
38527 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38529 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38530 name);
38531 *no_add_attrs = true;
38534 return NULL_TREE;
38537 static tree
38538 ix86_handle_fndecl_attribute (tree *node, tree name,
38539 tree args ATTRIBUTE_UNUSED,
38540 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38542 if (TREE_CODE (*node) != FUNCTION_DECL)
38544 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38545 name);
38546 *no_add_attrs = true;
38548 return NULL_TREE;
38551 static bool
38552 ix86_ms_bitfield_layout_p (const_tree record_type)
38554 return ((TARGET_MS_BITFIELD_LAYOUT
38555 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38556 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38559 /* Returns an expression indicating where the this parameter is
38560 located on entry to the FUNCTION. */
38562 static rtx
38563 x86_this_parameter (tree function)
38565 tree type = TREE_TYPE (function);
38566 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38567 int nregs;
38569 if (TARGET_64BIT)
38571 const int *parm_regs;
38573 if (ix86_function_type_abi (type) == MS_ABI)
38574 parm_regs = x86_64_ms_abi_int_parameter_registers;
38575 else
38576 parm_regs = x86_64_int_parameter_registers;
38577 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38580 nregs = ix86_function_regparm (type, function);
38582 if (nregs > 0 && !stdarg_p (type))
38584 int regno;
38585 unsigned int ccvt = ix86_get_callcvt (type);
38587 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38588 regno = aggr ? DX_REG : CX_REG;
38589 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38591 regno = CX_REG;
38592 if (aggr)
38593 return gen_rtx_MEM (SImode,
38594 plus_constant (Pmode, stack_pointer_rtx, 4));
38596 else
38598 regno = AX_REG;
38599 if (aggr)
38601 regno = DX_REG;
38602 if (nregs == 1)
38603 return gen_rtx_MEM (SImode,
38604 plus_constant (Pmode,
38605 stack_pointer_rtx, 4));
38608 return gen_rtx_REG (SImode, regno);
38611 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38612 aggr ? 8 : 4));
38615 /* Determine whether x86_output_mi_thunk can succeed. */
38617 static bool
38618 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38619 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38620 HOST_WIDE_INT vcall_offset, const_tree function)
38622 /* 64-bit can handle anything. */
38623 if (TARGET_64BIT)
38624 return true;
38626 /* For 32-bit, everything's fine if we have one free register. */
38627 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38628 return true;
38630 /* Need a free register for vcall_offset. */
38631 if (vcall_offset)
38632 return false;
38634 /* Need a free register for GOT references. */
38635 if (flag_pic && !targetm.binds_local_p (function))
38636 return false;
38638 /* Otherwise ok. */
38639 return true;
38642 /* Output the assembler code for a thunk function. THUNK_DECL is the
38643 declaration for the thunk function itself, FUNCTION is the decl for
38644 the target function. DELTA is an immediate constant offset to be
38645 added to THIS. If VCALL_OFFSET is nonzero, the word at
38646 *(*this + vcall_offset) should be added to THIS. */
38648 static void
38649 x86_output_mi_thunk (FILE *file,
38650 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38651 HOST_WIDE_INT vcall_offset, tree function)
38653 rtx this_param = x86_this_parameter (function);
38654 rtx this_reg, tmp, fnaddr;
38655 unsigned int tmp_regno;
38657 if (TARGET_64BIT)
38658 tmp_regno = R10_REG;
38659 else
38661 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38662 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38663 tmp_regno = AX_REG;
38664 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38665 tmp_regno = DX_REG;
38666 else
38667 tmp_regno = CX_REG;
38670 emit_note (NOTE_INSN_PROLOGUE_END);
38672 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38673 pull it in now and let DELTA benefit. */
38674 if (REG_P (this_param))
38675 this_reg = this_param;
38676 else if (vcall_offset)
38678 /* Put the this parameter into %eax. */
38679 this_reg = gen_rtx_REG (Pmode, AX_REG);
38680 emit_move_insn (this_reg, this_param);
38682 else
38683 this_reg = NULL_RTX;
38685 /* Adjust the this parameter by a fixed constant. */
38686 if (delta)
38688 rtx delta_rtx = GEN_INT (delta);
38689 rtx delta_dst = this_reg ? this_reg : this_param;
38691 if (TARGET_64BIT)
38693 if (!x86_64_general_operand (delta_rtx, Pmode))
38695 tmp = gen_rtx_REG (Pmode, tmp_regno);
38696 emit_move_insn (tmp, delta_rtx);
38697 delta_rtx = tmp;
38701 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38704 /* Adjust the this parameter by a value stored in the vtable. */
38705 if (vcall_offset)
38707 rtx vcall_addr, vcall_mem, this_mem;
38709 tmp = gen_rtx_REG (Pmode, tmp_regno);
38711 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38712 if (Pmode != ptr_mode)
38713 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38714 emit_move_insn (tmp, this_mem);
38716 /* Adjust the this parameter. */
38717 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38718 if (TARGET_64BIT
38719 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38721 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38722 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38723 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38726 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38727 if (Pmode != ptr_mode)
38728 emit_insn (gen_addsi_1_zext (this_reg,
38729 gen_rtx_REG (ptr_mode,
38730 REGNO (this_reg)),
38731 vcall_mem));
38732 else
38733 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38736 /* If necessary, drop THIS back to its stack slot. */
38737 if (this_reg && this_reg != this_param)
38738 emit_move_insn (this_param, this_reg);
38740 fnaddr = XEXP (DECL_RTL (function), 0);
38741 if (TARGET_64BIT)
38743 if (!flag_pic || targetm.binds_local_p (function)
38744 || TARGET_PECOFF)
38746 else
38748 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38749 tmp = gen_rtx_CONST (Pmode, tmp);
38750 fnaddr = gen_const_mem (Pmode, tmp);
38753 else
38755 if (!flag_pic || targetm.binds_local_p (function))
38757 #if TARGET_MACHO
38758 else if (TARGET_MACHO)
38760 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38761 fnaddr = XEXP (fnaddr, 0);
38763 #endif /* TARGET_MACHO */
38764 else
38766 tmp = gen_rtx_REG (Pmode, CX_REG);
38767 output_set_got (tmp, NULL_RTX);
38769 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38770 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38771 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38772 fnaddr = gen_const_mem (Pmode, fnaddr);
38776 /* Our sibling call patterns do not allow memories, because we have no
38777 predicate that can distinguish between frame and non-frame memory.
38778 For our purposes here, we can get away with (ab)using a jump pattern,
38779 because we're going to do no optimization. */
38780 if (MEM_P (fnaddr))
38781 emit_jump_insn (gen_indirect_jump (fnaddr));
38782 else
38784 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38785 fnaddr = legitimize_pic_address (fnaddr,
38786 gen_rtx_REG (Pmode, tmp_regno));
38788 if (!sibcall_insn_operand (fnaddr, word_mode))
38790 tmp = gen_rtx_REG (word_mode, tmp_regno);
38791 if (GET_MODE (fnaddr) != word_mode)
38792 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38793 emit_move_insn (tmp, fnaddr);
38794 fnaddr = tmp;
38797 tmp = gen_rtx_MEM (QImode, fnaddr);
38798 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38799 tmp = emit_call_insn (tmp);
38800 SIBLING_CALL_P (tmp) = 1;
38802 emit_barrier ();
38804 /* Emit just enough of rest_of_compilation to get the insns emitted.
38805 Note that use_thunk calls assemble_start_function et al. */
38806 tmp = get_insns ();
38807 shorten_branches (tmp);
38808 final_start_function (tmp, file, 1);
38809 final (tmp, file, 1);
38810 final_end_function ();
38813 static void
38814 x86_file_start (void)
38816 default_file_start ();
38817 if (TARGET_16BIT)
38818 fputs ("\t.code16gcc\n", asm_out_file);
38819 #if TARGET_MACHO
38820 darwin_file_start ();
38821 #endif
38822 if (X86_FILE_START_VERSION_DIRECTIVE)
38823 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38824 if (X86_FILE_START_FLTUSED)
38825 fputs ("\t.global\t__fltused\n", asm_out_file);
38826 if (ix86_asm_dialect == ASM_INTEL)
38827 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38831 x86_field_alignment (tree field, int computed)
38833 enum machine_mode mode;
38834 tree type = TREE_TYPE (field);
38836 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38837 return computed;
38838 mode = TYPE_MODE (strip_array_types (type));
38839 if (mode == DFmode || mode == DCmode
38840 || GET_MODE_CLASS (mode) == MODE_INT
38841 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38842 return MIN (32, computed);
38843 return computed;
38846 /* Output assembler code to FILE to increment profiler label # LABELNO
38847 for profiling a function entry. */
38848 void
38849 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38851 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38852 : MCOUNT_NAME);
38854 if (TARGET_64BIT)
38856 #ifndef NO_PROFILE_COUNTERS
38857 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38858 #endif
38860 if (!TARGET_PECOFF && flag_pic)
38861 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38862 else
38863 fprintf (file, "\tcall\t%s\n", mcount_name);
38865 else if (flag_pic)
38867 #ifndef NO_PROFILE_COUNTERS
38868 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38869 LPREFIX, labelno);
38870 #endif
38871 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38873 else
38875 #ifndef NO_PROFILE_COUNTERS
38876 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38877 LPREFIX, labelno);
38878 #endif
38879 fprintf (file, "\tcall\t%s\n", mcount_name);
38883 /* We don't have exact information about the insn sizes, but we may assume
38884 quite safely that we are informed about all 1 byte insns and memory
38885 address sizes. This is enough to eliminate unnecessary padding in
38886 99% of cases. */
38888 static int
38889 min_insn_size (rtx insn)
38891 int l = 0, len;
38893 if (!INSN_P (insn) || !active_insn_p (insn))
38894 return 0;
38896 /* Discard alignments we've emit and jump instructions. */
38897 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
38898 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
38899 return 0;
38901 /* Important case - calls are always 5 bytes.
38902 It is common to have many calls in the row. */
38903 if (CALL_P (insn)
38904 && symbolic_reference_mentioned_p (PATTERN (insn))
38905 && !SIBLING_CALL_P (insn))
38906 return 5;
38907 len = get_attr_length (insn);
38908 if (len <= 1)
38909 return 1;
38911 /* For normal instructions we rely on get_attr_length being exact,
38912 with a few exceptions. */
38913 if (!JUMP_P (insn))
38915 enum attr_type type = get_attr_type (insn);
38917 switch (type)
38919 case TYPE_MULTI:
38920 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
38921 || asm_noperands (PATTERN (insn)) >= 0)
38922 return 0;
38923 break;
38924 case TYPE_OTHER:
38925 case TYPE_FCMP:
38926 break;
38927 default:
38928 /* Otherwise trust get_attr_length. */
38929 return len;
38932 l = get_attr_length_address (insn);
38933 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
38934 l = 4;
38936 if (l)
38937 return 1+l;
38938 else
38939 return 2;
38942 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
38944 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
38945 window. */
38947 static void
38948 ix86_avoid_jump_mispredicts (void)
38950 rtx insn, start = get_insns ();
38951 int nbytes = 0, njumps = 0;
38952 int isjump = 0;
38954 /* Look for all minimal intervals of instructions containing 4 jumps.
38955 The intervals are bounded by START and INSN. NBYTES is the total
38956 size of instructions in the interval including INSN and not including
38957 START. When the NBYTES is smaller than 16 bytes, it is possible
38958 that the end of START and INSN ends up in the same 16byte page.
38960 The smallest offset in the page INSN can start is the case where START
38961 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
38962 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
38964 Don't consider asm goto as jump, while it can contain a jump, it doesn't
38965 have to, control transfer to label(s) can be performed through other
38966 means, and also we estimate minimum length of all asm stmts as 0. */
38967 for (insn = start; insn; insn = NEXT_INSN (insn))
38969 int min_size;
38971 if (LABEL_P (insn))
38973 int align = label_to_alignment (insn);
38974 int max_skip = label_to_max_skip (insn);
38976 if (max_skip > 15)
38977 max_skip = 15;
38978 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
38979 already in the current 16 byte page, because otherwise
38980 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
38981 bytes to reach 16 byte boundary. */
38982 if (align <= 0
38983 || (align <= 3 && max_skip != (1 << align) - 1))
38984 max_skip = 0;
38985 if (dump_file)
38986 fprintf (dump_file, "Label %i with max_skip %i\n",
38987 INSN_UID (insn), max_skip);
38988 if (max_skip)
38990 while (nbytes + max_skip >= 16)
38992 start = NEXT_INSN (start);
38993 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
38994 || CALL_P (start))
38995 njumps--, isjump = 1;
38996 else
38997 isjump = 0;
38998 nbytes -= min_insn_size (start);
39001 continue;
39004 min_size = min_insn_size (insn);
39005 nbytes += min_size;
39006 if (dump_file)
39007 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39008 INSN_UID (insn), min_size);
39009 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39010 || CALL_P (insn))
39011 njumps++;
39012 else
39013 continue;
39015 while (njumps > 3)
39017 start = NEXT_INSN (start);
39018 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39019 || CALL_P (start))
39020 njumps--, isjump = 1;
39021 else
39022 isjump = 0;
39023 nbytes -= min_insn_size (start);
39025 gcc_assert (njumps >= 0);
39026 if (dump_file)
39027 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39028 INSN_UID (start), INSN_UID (insn), nbytes);
39030 if (njumps == 3 && isjump && nbytes < 16)
39032 int padsize = 15 - nbytes + min_insn_size (insn);
39034 if (dump_file)
39035 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39036 INSN_UID (insn), padsize);
39037 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39041 #endif
39043 /* AMD Athlon works faster
39044 when RET is not destination of conditional jump or directly preceded
39045 by other jump instruction. We avoid the penalty by inserting NOP just
39046 before the RET instructions in such cases. */
39047 static void
39048 ix86_pad_returns (void)
39050 edge e;
39051 edge_iterator ei;
39053 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39055 basic_block bb = e->src;
39056 rtx ret = BB_END (bb);
39057 rtx prev;
39058 bool replace = false;
39060 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39061 || optimize_bb_for_size_p (bb))
39062 continue;
39063 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39064 if (active_insn_p (prev) || LABEL_P (prev))
39065 break;
39066 if (prev && LABEL_P (prev))
39068 edge e;
39069 edge_iterator ei;
39071 FOR_EACH_EDGE (e, ei, bb->preds)
39072 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39073 && !(e->flags & EDGE_FALLTHRU))
39075 replace = true;
39076 break;
39079 if (!replace)
39081 prev = prev_active_insn (ret);
39082 if (prev
39083 && ((JUMP_P (prev) && any_condjump_p (prev))
39084 || CALL_P (prev)))
39085 replace = true;
39086 /* Empty functions get branch mispredict even when
39087 the jump destination is not visible to us. */
39088 if (!prev && !optimize_function_for_size_p (cfun))
39089 replace = true;
39091 if (replace)
39093 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39094 delete_insn (ret);
39099 /* Count the minimum number of instructions in BB. Return 4 if the
39100 number of instructions >= 4. */
39102 static int
39103 ix86_count_insn_bb (basic_block bb)
39105 rtx insn;
39106 int insn_count = 0;
39108 /* Count number of instructions in this block. Return 4 if the number
39109 of instructions >= 4. */
39110 FOR_BB_INSNS (bb, insn)
39112 /* Only happen in exit blocks. */
39113 if (JUMP_P (insn)
39114 && ANY_RETURN_P (PATTERN (insn)))
39115 break;
39117 if (NONDEBUG_INSN_P (insn)
39118 && GET_CODE (PATTERN (insn)) != USE
39119 && GET_CODE (PATTERN (insn)) != CLOBBER)
39121 insn_count++;
39122 if (insn_count >= 4)
39123 return insn_count;
39127 return insn_count;
39131 /* Count the minimum number of instructions in code path in BB.
39132 Return 4 if the number of instructions >= 4. */
39134 static int
39135 ix86_count_insn (basic_block bb)
39137 edge e;
39138 edge_iterator ei;
39139 int min_prev_count;
39141 /* Only bother counting instructions along paths with no
39142 more than 2 basic blocks between entry and exit. Given
39143 that BB has an edge to exit, determine if a predecessor
39144 of BB has an edge from entry. If so, compute the number
39145 of instructions in the predecessor block. If there
39146 happen to be multiple such blocks, compute the minimum. */
39147 min_prev_count = 4;
39148 FOR_EACH_EDGE (e, ei, bb->preds)
39150 edge prev_e;
39151 edge_iterator prev_ei;
39153 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39155 min_prev_count = 0;
39156 break;
39158 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39160 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39162 int count = ix86_count_insn_bb (e->src);
39163 if (count < min_prev_count)
39164 min_prev_count = count;
39165 break;
39170 if (min_prev_count < 4)
39171 min_prev_count += ix86_count_insn_bb (bb);
39173 return min_prev_count;
39176 /* Pad short function to 4 instructions. */
39178 static void
39179 ix86_pad_short_function (void)
39181 edge e;
39182 edge_iterator ei;
39184 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39186 rtx ret = BB_END (e->src);
39187 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39189 int insn_count = ix86_count_insn (e->src);
39191 /* Pad short function. */
39192 if (insn_count < 4)
39194 rtx insn = ret;
39196 /* Find epilogue. */
39197 while (insn
39198 && (!NOTE_P (insn)
39199 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39200 insn = PREV_INSN (insn);
39202 if (!insn)
39203 insn = ret;
39205 /* Two NOPs count as one instruction. */
39206 insn_count = 2 * (4 - insn_count);
39207 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39213 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39214 the epilogue, the Windows system unwinder will apply epilogue logic and
39215 produce incorrect offsets. This can be avoided by adding a nop between
39216 the last insn that can throw and the first insn of the epilogue. */
39218 static void
39219 ix86_seh_fixup_eh_fallthru (void)
39221 edge e;
39222 edge_iterator ei;
39224 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39226 rtx insn, next;
39228 /* Find the beginning of the epilogue. */
39229 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39230 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39231 break;
39232 if (insn == NULL)
39233 continue;
39235 /* We only care about preceding insns that can throw. */
39236 insn = prev_active_insn (insn);
39237 if (insn == NULL || !can_throw_internal (insn))
39238 continue;
39240 /* Do not separate calls from their debug information. */
39241 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39242 if (NOTE_P (next)
39243 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39244 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39245 insn = next;
39246 else
39247 break;
39249 emit_insn_after (gen_nops (const1_rtx), insn);
39253 /* Implement machine specific optimizations. We implement padding of returns
39254 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39255 static void
39256 ix86_reorg (void)
39258 /* We are freeing block_for_insn in the toplev to keep compatibility
39259 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39260 compute_bb_for_insn ();
39262 if (TARGET_SEH && current_function_has_exception_handlers ())
39263 ix86_seh_fixup_eh_fallthru ();
39265 if (optimize && optimize_function_for_speed_p (cfun))
39267 if (TARGET_PAD_SHORT_FUNCTION)
39268 ix86_pad_short_function ();
39269 else if (TARGET_PAD_RETURNS)
39270 ix86_pad_returns ();
39271 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39272 if (TARGET_FOUR_JUMP_LIMIT)
39273 ix86_avoid_jump_mispredicts ();
39274 #endif
39278 /* Return nonzero when QImode register that must be represented via REX prefix
39279 is used. */
39280 bool
39281 x86_extended_QIreg_mentioned_p (rtx insn)
39283 int i;
39284 extract_insn_cached (insn);
39285 for (i = 0; i < recog_data.n_operands; i++)
39286 if (GENERAL_REG_P (recog_data.operand[i])
39287 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39288 return true;
39289 return false;
39292 /* Return nonzero when P points to register encoded via REX prefix.
39293 Called via for_each_rtx. */
39294 static int
39295 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39297 unsigned int regno;
39298 if (!REG_P (*p))
39299 return 0;
39300 regno = REGNO (*p);
39301 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39304 /* Return true when INSN mentions register that must be encoded using REX
39305 prefix. */
39306 bool
39307 x86_extended_reg_mentioned_p (rtx insn)
39309 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39310 extended_reg_mentioned_1, NULL);
39313 /* If profitable, negate (without causing overflow) integer constant
39314 of mode MODE at location LOC. Return true in this case. */
39315 bool
39316 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39318 HOST_WIDE_INT val;
39320 if (!CONST_INT_P (*loc))
39321 return false;
39323 switch (mode)
39325 case DImode:
39326 /* DImode x86_64 constants must fit in 32 bits. */
39327 gcc_assert (x86_64_immediate_operand (*loc, mode));
39329 mode = SImode;
39330 break;
39332 case SImode:
39333 case HImode:
39334 case QImode:
39335 break;
39337 default:
39338 gcc_unreachable ();
39341 /* Avoid overflows. */
39342 if (mode_signbit_p (mode, *loc))
39343 return false;
39345 val = INTVAL (*loc);
39347 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39348 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39349 if ((val < 0 && val != -128)
39350 || val == 128)
39352 *loc = GEN_INT (-val);
39353 return true;
39356 return false;
39359 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39360 optabs would emit if we didn't have TFmode patterns. */
39362 void
39363 x86_emit_floatuns (rtx operands[2])
39365 rtx neglab, donelab, i0, i1, f0, in, out;
39366 enum machine_mode mode, inmode;
39368 inmode = GET_MODE (operands[1]);
39369 gcc_assert (inmode == SImode || inmode == DImode);
39371 out = operands[0];
39372 in = force_reg (inmode, operands[1]);
39373 mode = GET_MODE (out);
39374 neglab = gen_label_rtx ();
39375 donelab = gen_label_rtx ();
39376 f0 = gen_reg_rtx (mode);
39378 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39380 expand_float (out, in, 0);
39382 emit_jump_insn (gen_jump (donelab));
39383 emit_barrier ();
39385 emit_label (neglab);
39387 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39388 1, OPTAB_DIRECT);
39389 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39390 1, OPTAB_DIRECT);
39391 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39393 expand_float (f0, i0, 0);
39395 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39397 emit_label (donelab);
39400 /* AVX512F does support 64-byte integer vector operations,
39401 thus the longest vector we are faced with is V64QImode. */
39402 #define MAX_VECT_LEN 64
39404 struct expand_vec_perm_d
39406 rtx target, op0, op1;
39407 unsigned char perm[MAX_VECT_LEN];
39408 enum machine_mode vmode;
39409 unsigned char nelt;
39410 bool one_operand_p;
39411 bool testing_p;
39414 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39415 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39416 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39418 /* Get a vector mode of the same size as the original but with elements
39419 twice as wide. This is only guaranteed to apply to integral vectors. */
39421 static inline enum machine_mode
39422 get_mode_wider_vector (enum machine_mode o)
39424 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39425 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39426 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39427 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39428 return n;
39431 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39432 fill target with val via vec_duplicate. */
39434 static bool
39435 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39437 bool ok;
39438 rtx insn, dup;
39440 /* First attempt to recognize VAL as-is. */
39441 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39442 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39443 if (recog_memoized (insn) < 0)
39445 rtx seq;
39446 /* If that fails, force VAL into a register. */
39448 start_sequence ();
39449 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39450 seq = get_insns ();
39451 end_sequence ();
39452 if (seq)
39453 emit_insn_before (seq, insn);
39455 ok = recog_memoized (insn) >= 0;
39456 gcc_assert (ok);
39458 return true;
39461 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39462 with all elements equal to VAR. Return true if successful. */
39464 static bool
39465 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39466 rtx target, rtx val)
39468 bool ok;
39470 switch (mode)
39472 case V2SImode:
39473 case V2SFmode:
39474 if (!mmx_ok)
39475 return false;
39476 /* FALLTHRU */
39478 case V4DFmode:
39479 case V4DImode:
39480 case V8SFmode:
39481 case V8SImode:
39482 case V2DFmode:
39483 case V2DImode:
39484 case V4SFmode:
39485 case V4SImode:
39486 case V16SImode:
39487 case V8DImode:
39488 case V16SFmode:
39489 case V8DFmode:
39490 return ix86_vector_duplicate_value (mode, target, val);
39492 case V4HImode:
39493 if (!mmx_ok)
39494 return false;
39495 if (TARGET_SSE || TARGET_3DNOW_A)
39497 rtx x;
39499 val = gen_lowpart (SImode, val);
39500 x = gen_rtx_TRUNCATE (HImode, val);
39501 x = gen_rtx_VEC_DUPLICATE (mode, x);
39502 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39503 return true;
39505 goto widen;
39507 case V8QImode:
39508 if (!mmx_ok)
39509 return false;
39510 goto widen;
39512 case V8HImode:
39513 if (TARGET_SSE2)
39515 struct expand_vec_perm_d dperm;
39516 rtx tmp1, tmp2;
39518 permute:
39519 memset (&dperm, 0, sizeof (dperm));
39520 dperm.target = target;
39521 dperm.vmode = mode;
39522 dperm.nelt = GET_MODE_NUNITS (mode);
39523 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39524 dperm.one_operand_p = true;
39526 /* Extend to SImode using a paradoxical SUBREG. */
39527 tmp1 = gen_reg_rtx (SImode);
39528 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39530 /* Insert the SImode value as low element of a V4SImode vector. */
39531 tmp2 = gen_reg_rtx (V4SImode);
39532 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39533 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39535 ok = (expand_vec_perm_1 (&dperm)
39536 || expand_vec_perm_broadcast_1 (&dperm));
39537 gcc_assert (ok);
39538 return ok;
39540 goto widen;
39542 case V16QImode:
39543 if (TARGET_SSE2)
39544 goto permute;
39545 goto widen;
39547 widen:
39548 /* Replicate the value once into the next wider mode and recurse. */
39550 enum machine_mode smode, wsmode, wvmode;
39551 rtx x;
39553 smode = GET_MODE_INNER (mode);
39554 wvmode = get_mode_wider_vector (mode);
39555 wsmode = GET_MODE_INNER (wvmode);
39557 val = convert_modes (wsmode, smode, val, true);
39558 x = expand_simple_binop (wsmode, ASHIFT, val,
39559 GEN_INT (GET_MODE_BITSIZE (smode)),
39560 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39561 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39563 x = gen_reg_rtx (wvmode);
39564 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39565 gcc_assert (ok);
39566 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39567 return ok;
39570 case V16HImode:
39571 case V32QImode:
39573 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39574 rtx x = gen_reg_rtx (hvmode);
39576 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39577 gcc_assert (ok);
39579 x = gen_rtx_VEC_CONCAT (mode, x, x);
39580 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39582 return true;
39584 default:
39585 return false;
39589 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39590 whose ONE_VAR element is VAR, and other elements are zero. Return true
39591 if successful. */
39593 static bool
39594 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39595 rtx target, rtx var, int one_var)
39597 enum machine_mode vsimode;
39598 rtx new_target;
39599 rtx x, tmp;
39600 bool use_vector_set = false;
39602 switch (mode)
39604 case V2DImode:
39605 /* For SSE4.1, we normally use vector set. But if the second
39606 element is zero and inter-unit moves are OK, we use movq
39607 instead. */
39608 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39609 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39610 && one_var == 0));
39611 break;
39612 case V16QImode:
39613 case V4SImode:
39614 case V4SFmode:
39615 use_vector_set = TARGET_SSE4_1;
39616 break;
39617 case V8HImode:
39618 use_vector_set = TARGET_SSE2;
39619 break;
39620 case V4HImode:
39621 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39622 break;
39623 case V32QImode:
39624 case V16HImode:
39625 case V8SImode:
39626 case V8SFmode:
39627 case V4DFmode:
39628 use_vector_set = TARGET_AVX;
39629 break;
39630 case V4DImode:
39631 /* Use ix86_expand_vector_set in 64bit mode only. */
39632 use_vector_set = TARGET_AVX && TARGET_64BIT;
39633 break;
39634 default:
39635 break;
39638 if (use_vector_set)
39640 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39641 var = force_reg (GET_MODE_INNER (mode), var);
39642 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39643 return true;
39646 switch (mode)
39648 case V2SFmode:
39649 case V2SImode:
39650 if (!mmx_ok)
39651 return false;
39652 /* FALLTHRU */
39654 case V2DFmode:
39655 case V2DImode:
39656 if (one_var != 0)
39657 return false;
39658 var = force_reg (GET_MODE_INNER (mode), var);
39659 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39660 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39661 return true;
39663 case V4SFmode:
39664 case V4SImode:
39665 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39666 new_target = gen_reg_rtx (mode);
39667 else
39668 new_target = target;
39669 var = force_reg (GET_MODE_INNER (mode), var);
39670 x = gen_rtx_VEC_DUPLICATE (mode, var);
39671 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39672 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39673 if (one_var != 0)
39675 /* We need to shuffle the value to the correct position, so
39676 create a new pseudo to store the intermediate result. */
39678 /* With SSE2, we can use the integer shuffle insns. */
39679 if (mode != V4SFmode && TARGET_SSE2)
39681 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39682 const1_rtx,
39683 GEN_INT (one_var == 1 ? 0 : 1),
39684 GEN_INT (one_var == 2 ? 0 : 1),
39685 GEN_INT (one_var == 3 ? 0 : 1)));
39686 if (target != new_target)
39687 emit_move_insn (target, new_target);
39688 return true;
39691 /* Otherwise convert the intermediate result to V4SFmode and
39692 use the SSE1 shuffle instructions. */
39693 if (mode != V4SFmode)
39695 tmp = gen_reg_rtx (V4SFmode);
39696 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39698 else
39699 tmp = new_target;
39701 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39702 const1_rtx,
39703 GEN_INT (one_var == 1 ? 0 : 1),
39704 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39705 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39707 if (mode != V4SFmode)
39708 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39709 else if (tmp != target)
39710 emit_move_insn (target, tmp);
39712 else if (target != new_target)
39713 emit_move_insn (target, new_target);
39714 return true;
39716 case V8HImode:
39717 case V16QImode:
39718 vsimode = V4SImode;
39719 goto widen;
39720 case V4HImode:
39721 case V8QImode:
39722 if (!mmx_ok)
39723 return false;
39724 vsimode = V2SImode;
39725 goto widen;
39726 widen:
39727 if (one_var != 0)
39728 return false;
39730 /* Zero extend the variable element to SImode and recurse. */
39731 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39733 x = gen_reg_rtx (vsimode);
39734 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39735 var, one_var))
39736 gcc_unreachable ();
39738 emit_move_insn (target, gen_lowpart (mode, x));
39739 return true;
39741 default:
39742 return false;
39746 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39747 consisting of the values in VALS. It is known that all elements
39748 except ONE_VAR are constants. Return true if successful. */
39750 static bool
39751 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39752 rtx target, rtx vals, int one_var)
39754 rtx var = XVECEXP (vals, 0, one_var);
39755 enum machine_mode wmode;
39756 rtx const_vec, x;
39758 const_vec = copy_rtx (vals);
39759 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39760 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39762 switch (mode)
39764 case V2DFmode:
39765 case V2DImode:
39766 case V2SFmode:
39767 case V2SImode:
39768 /* For the two element vectors, it's just as easy to use
39769 the general case. */
39770 return false;
39772 case V4DImode:
39773 /* Use ix86_expand_vector_set in 64bit mode only. */
39774 if (!TARGET_64BIT)
39775 return false;
39776 case V4DFmode:
39777 case V8SFmode:
39778 case V8SImode:
39779 case V16HImode:
39780 case V32QImode:
39781 case V4SFmode:
39782 case V4SImode:
39783 case V8HImode:
39784 case V4HImode:
39785 break;
39787 case V16QImode:
39788 if (TARGET_SSE4_1)
39789 break;
39790 wmode = V8HImode;
39791 goto widen;
39792 case V8QImode:
39793 wmode = V4HImode;
39794 goto widen;
39795 widen:
39796 /* There's no way to set one QImode entry easily. Combine
39797 the variable value with its adjacent constant value, and
39798 promote to an HImode set. */
39799 x = XVECEXP (vals, 0, one_var ^ 1);
39800 if (one_var & 1)
39802 var = convert_modes (HImode, QImode, var, true);
39803 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39804 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39805 x = GEN_INT (INTVAL (x) & 0xff);
39807 else
39809 var = convert_modes (HImode, QImode, var, true);
39810 x = gen_int_mode (INTVAL (x) << 8, HImode);
39812 if (x != const0_rtx)
39813 var = expand_simple_binop (HImode, IOR, var, x, var,
39814 1, OPTAB_LIB_WIDEN);
39816 x = gen_reg_rtx (wmode);
39817 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39818 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39820 emit_move_insn (target, gen_lowpart (mode, x));
39821 return true;
39823 default:
39824 return false;
39827 emit_move_insn (target, const_vec);
39828 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39829 return true;
39832 /* A subroutine of ix86_expand_vector_init_general. Use vector
39833 concatenate to handle the most general case: all values variable,
39834 and none identical. */
39836 static void
39837 ix86_expand_vector_init_concat (enum machine_mode mode,
39838 rtx target, rtx *ops, int n)
39840 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39841 rtx first[16], second[8], third[4];
39842 rtvec v;
39843 int i, j;
39845 switch (n)
39847 case 2:
39848 switch (mode)
39850 case V16SImode:
39851 cmode = V8SImode;
39852 break;
39853 case V16SFmode:
39854 cmode = V8SFmode;
39855 break;
39856 case V8DImode:
39857 cmode = V4DImode;
39858 break;
39859 case V8DFmode:
39860 cmode = V4DFmode;
39861 break;
39862 case V8SImode:
39863 cmode = V4SImode;
39864 break;
39865 case V8SFmode:
39866 cmode = V4SFmode;
39867 break;
39868 case V4DImode:
39869 cmode = V2DImode;
39870 break;
39871 case V4DFmode:
39872 cmode = V2DFmode;
39873 break;
39874 case V4SImode:
39875 cmode = V2SImode;
39876 break;
39877 case V4SFmode:
39878 cmode = V2SFmode;
39879 break;
39880 case V2DImode:
39881 cmode = DImode;
39882 break;
39883 case V2SImode:
39884 cmode = SImode;
39885 break;
39886 case V2DFmode:
39887 cmode = DFmode;
39888 break;
39889 case V2SFmode:
39890 cmode = SFmode;
39891 break;
39892 default:
39893 gcc_unreachable ();
39896 if (!register_operand (ops[1], cmode))
39897 ops[1] = force_reg (cmode, ops[1]);
39898 if (!register_operand (ops[0], cmode))
39899 ops[0] = force_reg (cmode, ops[0]);
39900 emit_insn (gen_rtx_SET (VOIDmode, target,
39901 gen_rtx_VEC_CONCAT (mode, ops[0],
39902 ops[1])));
39903 break;
39905 case 4:
39906 switch (mode)
39908 case V4DImode:
39909 cmode = V2DImode;
39910 break;
39911 case V4DFmode:
39912 cmode = V2DFmode;
39913 break;
39914 case V4SImode:
39915 cmode = V2SImode;
39916 break;
39917 case V4SFmode:
39918 cmode = V2SFmode;
39919 break;
39920 default:
39921 gcc_unreachable ();
39923 goto half;
39925 case 8:
39926 switch (mode)
39928 case V8DImode:
39929 cmode = V2DImode;
39930 hmode = V4DImode;
39931 break;
39932 case V8DFmode:
39933 cmode = V2DFmode;
39934 hmode = V4DFmode;
39935 break;
39936 case V8SImode:
39937 cmode = V2SImode;
39938 hmode = V4SImode;
39939 break;
39940 case V8SFmode:
39941 cmode = V2SFmode;
39942 hmode = V4SFmode;
39943 break;
39944 default:
39945 gcc_unreachable ();
39947 goto half;
39949 case 16:
39950 switch (mode)
39952 case V16SImode:
39953 cmode = V2SImode;
39954 hmode = V4SImode;
39955 gmode = V8SImode;
39956 break;
39957 case V16SFmode:
39958 cmode = V2SFmode;
39959 hmode = V4SFmode;
39960 gmode = V8SFmode;
39961 break;
39962 default:
39963 gcc_unreachable ();
39965 goto half;
39967 half:
39968 /* FIXME: We process inputs backward to help RA. PR 36222. */
39969 i = n - 1;
39970 j = (n >> 1) - 1;
39971 for (; i > 0; i -= 2, j--)
39973 first[j] = gen_reg_rtx (cmode);
39974 v = gen_rtvec (2, ops[i - 1], ops[i]);
39975 ix86_expand_vector_init (false, first[j],
39976 gen_rtx_PARALLEL (cmode, v));
39979 n >>= 1;
39980 if (n > 4)
39982 gcc_assert (hmode != VOIDmode);
39983 gcc_assert (gmode != VOIDmode);
39984 for (i = j = 0; i < n; i += 2, j++)
39986 second[j] = gen_reg_rtx (hmode);
39987 ix86_expand_vector_init_concat (hmode, second [j],
39988 &first [i], 2);
39990 n >>= 1;
39991 for (i = j = 0; i < n; i += 2, j++)
39993 third[j] = gen_reg_rtx (gmode);
39994 ix86_expand_vector_init_concat (gmode, third[j],
39995 &second[i], 2);
39997 n >>= 1;
39998 ix86_expand_vector_init_concat (mode, target, third, n);
40000 else if (n > 2)
40002 gcc_assert (hmode != VOIDmode);
40003 for (i = j = 0; i < n; i += 2, j++)
40005 second[j] = gen_reg_rtx (hmode);
40006 ix86_expand_vector_init_concat (hmode, second [j],
40007 &first [i], 2);
40009 n >>= 1;
40010 ix86_expand_vector_init_concat (mode, target, second, n);
40012 else
40013 ix86_expand_vector_init_concat (mode, target, first, n);
40014 break;
40016 default:
40017 gcc_unreachable ();
40021 /* A subroutine of ix86_expand_vector_init_general. Use vector
40022 interleave to handle the most general case: all values variable,
40023 and none identical. */
40025 static void
40026 ix86_expand_vector_init_interleave (enum machine_mode mode,
40027 rtx target, rtx *ops, int n)
40029 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40030 int i, j;
40031 rtx op0, op1;
40032 rtx (*gen_load_even) (rtx, rtx, rtx);
40033 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40034 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40036 switch (mode)
40038 case V8HImode:
40039 gen_load_even = gen_vec_setv8hi;
40040 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40041 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40042 inner_mode = HImode;
40043 first_imode = V4SImode;
40044 second_imode = V2DImode;
40045 third_imode = VOIDmode;
40046 break;
40047 case V16QImode:
40048 gen_load_even = gen_vec_setv16qi;
40049 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40050 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40051 inner_mode = QImode;
40052 first_imode = V8HImode;
40053 second_imode = V4SImode;
40054 third_imode = V2DImode;
40055 break;
40056 default:
40057 gcc_unreachable ();
40060 for (i = 0; i < n; i++)
40062 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40063 op0 = gen_reg_rtx (SImode);
40064 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40066 /* Insert the SImode value as low element of V4SImode vector. */
40067 op1 = gen_reg_rtx (V4SImode);
40068 op0 = gen_rtx_VEC_MERGE (V4SImode,
40069 gen_rtx_VEC_DUPLICATE (V4SImode,
40070 op0),
40071 CONST0_RTX (V4SImode),
40072 const1_rtx);
40073 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40075 /* Cast the V4SImode vector back to a vector in orignal mode. */
40076 op0 = gen_reg_rtx (mode);
40077 emit_move_insn (op0, gen_lowpart (mode, op1));
40079 /* Load even elements into the second position. */
40080 emit_insn (gen_load_even (op0,
40081 force_reg (inner_mode,
40082 ops [i + i + 1]),
40083 const1_rtx));
40085 /* Cast vector to FIRST_IMODE vector. */
40086 ops[i] = gen_reg_rtx (first_imode);
40087 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40090 /* Interleave low FIRST_IMODE vectors. */
40091 for (i = j = 0; i < n; i += 2, j++)
40093 op0 = gen_reg_rtx (first_imode);
40094 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40096 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40097 ops[j] = gen_reg_rtx (second_imode);
40098 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40101 /* Interleave low SECOND_IMODE vectors. */
40102 switch (second_imode)
40104 case V4SImode:
40105 for (i = j = 0; i < n / 2; i += 2, j++)
40107 op0 = gen_reg_rtx (second_imode);
40108 emit_insn (gen_interleave_second_low (op0, ops[i],
40109 ops[i + 1]));
40111 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40112 vector. */
40113 ops[j] = gen_reg_rtx (third_imode);
40114 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40116 second_imode = V2DImode;
40117 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40118 /* FALLTHRU */
40120 case V2DImode:
40121 op0 = gen_reg_rtx (second_imode);
40122 emit_insn (gen_interleave_second_low (op0, ops[0],
40123 ops[1]));
40125 /* Cast the SECOND_IMODE vector back to a vector on original
40126 mode. */
40127 emit_insn (gen_rtx_SET (VOIDmode, target,
40128 gen_lowpart (mode, op0)));
40129 break;
40131 default:
40132 gcc_unreachable ();
40136 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40137 all values variable, and none identical. */
40139 static void
40140 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40141 rtx target, rtx vals)
40143 rtx ops[64], op0, op1;
40144 enum machine_mode half_mode = VOIDmode;
40145 int n, i;
40147 switch (mode)
40149 case V2SFmode:
40150 case V2SImode:
40151 if (!mmx_ok && !TARGET_SSE)
40152 break;
40153 /* FALLTHRU */
40155 case V16SImode:
40156 case V16SFmode:
40157 case V8DFmode:
40158 case V8DImode:
40159 case V8SFmode:
40160 case V8SImode:
40161 case V4DFmode:
40162 case V4DImode:
40163 case V4SFmode:
40164 case V4SImode:
40165 case V2DFmode:
40166 case V2DImode:
40167 n = GET_MODE_NUNITS (mode);
40168 for (i = 0; i < n; i++)
40169 ops[i] = XVECEXP (vals, 0, i);
40170 ix86_expand_vector_init_concat (mode, target, ops, n);
40171 return;
40173 case V32QImode:
40174 half_mode = V16QImode;
40175 goto half;
40177 case V16HImode:
40178 half_mode = V8HImode;
40179 goto half;
40181 half:
40182 n = GET_MODE_NUNITS (mode);
40183 for (i = 0; i < n; i++)
40184 ops[i] = XVECEXP (vals, 0, i);
40185 op0 = gen_reg_rtx (half_mode);
40186 op1 = gen_reg_rtx (half_mode);
40187 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40188 n >> 2);
40189 ix86_expand_vector_init_interleave (half_mode, op1,
40190 &ops [n >> 1], n >> 2);
40191 emit_insn (gen_rtx_SET (VOIDmode, target,
40192 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40193 return;
40195 case V16QImode:
40196 if (!TARGET_SSE4_1)
40197 break;
40198 /* FALLTHRU */
40200 case V8HImode:
40201 if (!TARGET_SSE2)
40202 break;
40204 /* Don't use ix86_expand_vector_init_interleave if we can't
40205 move from GPR to SSE register directly. */
40206 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40207 break;
40209 n = GET_MODE_NUNITS (mode);
40210 for (i = 0; i < n; i++)
40211 ops[i] = XVECEXP (vals, 0, i);
40212 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40213 return;
40215 case V4HImode:
40216 case V8QImode:
40217 break;
40219 default:
40220 gcc_unreachable ();
40224 int i, j, n_elts, n_words, n_elt_per_word;
40225 enum machine_mode inner_mode;
40226 rtx words[4], shift;
40228 inner_mode = GET_MODE_INNER (mode);
40229 n_elts = GET_MODE_NUNITS (mode);
40230 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40231 n_elt_per_word = n_elts / n_words;
40232 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40234 for (i = 0; i < n_words; ++i)
40236 rtx word = NULL_RTX;
40238 for (j = 0; j < n_elt_per_word; ++j)
40240 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40241 elt = convert_modes (word_mode, inner_mode, elt, true);
40243 if (j == 0)
40244 word = elt;
40245 else
40247 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40248 word, 1, OPTAB_LIB_WIDEN);
40249 word = expand_simple_binop (word_mode, IOR, word, elt,
40250 word, 1, OPTAB_LIB_WIDEN);
40254 words[i] = word;
40257 if (n_words == 1)
40258 emit_move_insn (target, gen_lowpart (mode, words[0]));
40259 else if (n_words == 2)
40261 rtx tmp = gen_reg_rtx (mode);
40262 emit_clobber (tmp);
40263 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40264 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40265 emit_move_insn (target, tmp);
40267 else if (n_words == 4)
40269 rtx tmp = gen_reg_rtx (V4SImode);
40270 gcc_assert (word_mode == SImode);
40271 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40272 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40273 emit_move_insn (target, gen_lowpart (mode, tmp));
40275 else
40276 gcc_unreachable ();
40280 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40281 instructions unless MMX_OK is true. */
40283 void
40284 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40286 enum machine_mode mode = GET_MODE (target);
40287 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40288 int n_elts = GET_MODE_NUNITS (mode);
40289 int n_var = 0, one_var = -1;
40290 bool all_same = true, all_const_zero = true;
40291 int i;
40292 rtx x;
40294 for (i = 0; i < n_elts; ++i)
40296 x = XVECEXP (vals, 0, i);
40297 if (!(CONST_INT_P (x)
40298 || GET_CODE (x) == CONST_DOUBLE
40299 || GET_CODE (x) == CONST_FIXED))
40300 n_var++, one_var = i;
40301 else if (x != CONST0_RTX (inner_mode))
40302 all_const_zero = false;
40303 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40304 all_same = false;
40307 /* Constants are best loaded from the constant pool. */
40308 if (n_var == 0)
40310 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40311 return;
40314 /* If all values are identical, broadcast the value. */
40315 if (all_same
40316 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40317 XVECEXP (vals, 0, 0)))
40318 return;
40320 /* Values where only one field is non-constant are best loaded from
40321 the pool and overwritten via move later. */
40322 if (n_var == 1)
40324 if (all_const_zero
40325 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40326 XVECEXP (vals, 0, one_var),
40327 one_var))
40328 return;
40330 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40331 return;
40334 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40337 void
40338 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40340 enum machine_mode mode = GET_MODE (target);
40341 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40342 enum machine_mode half_mode;
40343 bool use_vec_merge = false;
40344 rtx tmp;
40345 static rtx (*gen_extract[6][2]) (rtx, rtx)
40347 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40348 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40349 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40350 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40351 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40352 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40354 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40356 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40357 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40358 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40359 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40360 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40361 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40363 int i, j, n;
40365 switch (mode)
40367 case V2SFmode:
40368 case V2SImode:
40369 if (mmx_ok)
40371 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40372 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40373 if (elt == 0)
40374 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40375 else
40376 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40377 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40378 return;
40380 break;
40382 case V2DImode:
40383 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40384 if (use_vec_merge)
40385 break;
40387 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40388 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40389 if (elt == 0)
40390 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40391 else
40392 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40393 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40394 return;
40396 case V2DFmode:
40398 rtx op0, op1;
40400 /* For the two element vectors, we implement a VEC_CONCAT with
40401 the extraction of the other element. */
40403 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40404 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40406 if (elt == 0)
40407 op0 = val, op1 = tmp;
40408 else
40409 op0 = tmp, op1 = val;
40411 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40412 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40414 return;
40416 case V4SFmode:
40417 use_vec_merge = TARGET_SSE4_1;
40418 if (use_vec_merge)
40419 break;
40421 switch (elt)
40423 case 0:
40424 use_vec_merge = true;
40425 break;
40427 case 1:
40428 /* tmp = target = A B C D */
40429 tmp = copy_to_reg (target);
40430 /* target = A A B B */
40431 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40432 /* target = X A B B */
40433 ix86_expand_vector_set (false, target, val, 0);
40434 /* target = A X C D */
40435 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40436 const1_rtx, const0_rtx,
40437 GEN_INT (2+4), GEN_INT (3+4)));
40438 return;
40440 case 2:
40441 /* tmp = target = A B C D */
40442 tmp = copy_to_reg (target);
40443 /* tmp = X B C D */
40444 ix86_expand_vector_set (false, tmp, val, 0);
40445 /* target = A B X D */
40446 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40447 const0_rtx, const1_rtx,
40448 GEN_INT (0+4), GEN_INT (3+4)));
40449 return;
40451 case 3:
40452 /* tmp = target = A B C D */
40453 tmp = copy_to_reg (target);
40454 /* tmp = X B C D */
40455 ix86_expand_vector_set (false, tmp, val, 0);
40456 /* target = A B X D */
40457 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40458 const0_rtx, const1_rtx,
40459 GEN_INT (2+4), GEN_INT (0+4)));
40460 return;
40462 default:
40463 gcc_unreachable ();
40465 break;
40467 case V4SImode:
40468 use_vec_merge = TARGET_SSE4_1;
40469 if (use_vec_merge)
40470 break;
40472 /* Element 0 handled by vec_merge below. */
40473 if (elt == 0)
40475 use_vec_merge = true;
40476 break;
40479 if (TARGET_SSE2)
40481 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40482 store into element 0, then shuffle them back. */
40484 rtx order[4];
40486 order[0] = GEN_INT (elt);
40487 order[1] = const1_rtx;
40488 order[2] = const2_rtx;
40489 order[3] = GEN_INT (3);
40490 order[elt] = const0_rtx;
40492 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40493 order[1], order[2], order[3]));
40495 ix86_expand_vector_set (false, target, val, 0);
40497 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40498 order[1], order[2], order[3]));
40500 else
40502 /* For SSE1, we have to reuse the V4SF code. */
40503 rtx t = gen_reg_rtx (V4SFmode);
40504 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40505 emit_move_insn (target, gen_lowpart (mode, t));
40507 return;
40509 case V8HImode:
40510 use_vec_merge = TARGET_SSE2;
40511 break;
40512 case V4HImode:
40513 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40514 break;
40516 case V16QImode:
40517 use_vec_merge = TARGET_SSE4_1;
40518 break;
40520 case V8QImode:
40521 break;
40523 case V32QImode:
40524 half_mode = V16QImode;
40525 j = 0;
40526 n = 16;
40527 goto half;
40529 case V16HImode:
40530 half_mode = V8HImode;
40531 j = 1;
40532 n = 8;
40533 goto half;
40535 case V8SImode:
40536 half_mode = V4SImode;
40537 j = 2;
40538 n = 4;
40539 goto half;
40541 case V4DImode:
40542 half_mode = V2DImode;
40543 j = 3;
40544 n = 2;
40545 goto half;
40547 case V8SFmode:
40548 half_mode = V4SFmode;
40549 j = 4;
40550 n = 4;
40551 goto half;
40553 case V4DFmode:
40554 half_mode = V2DFmode;
40555 j = 5;
40556 n = 2;
40557 goto half;
40559 half:
40560 /* Compute offset. */
40561 i = elt / n;
40562 elt %= n;
40564 gcc_assert (i <= 1);
40566 /* Extract the half. */
40567 tmp = gen_reg_rtx (half_mode);
40568 emit_insn (gen_extract[j][i] (tmp, target));
40570 /* Put val in tmp at elt. */
40571 ix86_expand_vector_set (false, tmp, val, elt);
40573 /* Put it back. */
40574 emit_insn (gen_insert[j][i] (target, target, tmp));
40575 return;
40577 default:
40578 break;
40581 if (use_vec_merge)
40583 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40584 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40585 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40587 else
40589 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40591 emit_move_insn (mem, target);
40593 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40594 emit_move_insn (tmp, val);
40596 emit_move_insn (target, mem);
40600 void
40601 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40603 enum machine_mode mode = GET_MODE (vec);
40604 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40605 bool use_vec_extr = false;
40606 rtx tmp;
40608 switch (mode)
40610 case V2SImode:
40611 case V2SFmode:
40612 if (!mmx_ok)
40613 break;
40614 /* FALLTHRU */
40616 case V2DFmode:
40617 case V2DImode:
40618 use_vec_extr = true;
40619 break;
40621 case V4SFmode:
40622 use_vec_extr = TARGET_SSE4_1;
40623 if (use_vec_extr)
40624 break;
40626 switch (elt)
40628 case 0:
40629 tmp = vec;
40630 break;
40632 case 1:
40633 case 3:
40634 tmp = gen_reg_rtx (mode);
40635 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40636 GEN_INT (elt), GEN_INT (elt),
40637 GEN_INT (elt+4), GEN_INT (elt+4)));
40638 break;
40640 case 2:
40641 tmp = gen_reg_rtx (mode);
40642 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40643 break;
40645 default:
40646 gcc_unreachable ();
40648 vec = tmp;
40649 use_vec_extr = true;
40650 elt = 0;
40651 break;
40653 case V4SImode:
40654 use_vec_extr = TARGET_SSE4_1;
40655 if (use_vec_extr)
40656 break;
40658 if (TARGET_SSE2)
40660 switch (elt)
40662 case 0:
40663 tmp = vec;
40664 break;
40666 case 1:
40667 case 3:
40668 tmp = gen_reg_rtx (mode);
40669 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40670 GEN_INT (elt), GEN_INT (elt),
40671 GEN_INT (elt), GEN_INT (elt)));
40672 break;
40674 case 2:
40675 tmp = gen_reg_rtx (mode);
40676 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40677 break;
40679 default:
40680 gcc_unreachable ();
40682 vec = tmp;
40683 use_vec_extr = true;
40684 elt = 0;
40686 else
40688 /* For SSE1, we have to reuse the V4SF code. */
40689 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40690 gen_lowpart (V4SFmode, vec), elt);
40691 return;
40693 break;
40695 case V8HImode:
40696 use_vec_extr = TARGET_SSE2;
40697 break;
40698 case V4HImode:
40699 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40700 break;
40702 case V16QImode:
40703 use_vec_extr = TARGET_SSE4_1;
40704 break;
40706 case V8SFmode:
40707 if (TARGET_AVX)
40709 tmp = gen_reg_rtx (V4SFmode);
40710 if (elt < 4)
40711 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40712 else
40713 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40714 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40715 return;
40717 break;
40719 case V4DFmode:
40720 if (TARGET_AVX)
40722 tmp = gen_reg_rtx (V2DFmode);
40723 if (elt < 2)
40724 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40725 else
40726 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40727 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40728 return;
40730 break;
40732 case V32QImode:
40733 if (TARGET_AVX)
40735 tmp = gen_reg_rtx (V16QImode);
40736 if (elt < 16)
40737 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40738 else
40739 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40740 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40741 return;
40743 break;
40745 case V16HImode:
40746 if (TARGET_AVX)
40748 tmp = gen_reg_rtx (V8HImode);
40749 if (elt < 8)
40750 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40751 else
40752 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40753 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40754 return;
40756 break;
40758 case V8SImode:
40759 if (TARGET_AVX)
40761 tmp = gen_reg_rtx (V4SImode);
40762 if (elt < 4)
40763 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40764 else
40765 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40766 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40767 return;
40769 break;
40771 case V4DImode:
40772 if (TARGET_AVX)
40774 tmp = gen_reg_rtx (V2DImode);
40775 if (elt < 2)
40776 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40777 else
40778 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40779 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40780 return;
40782 break;
40784 case V16SFmode:
40785 tmp = gen_reg_rtx (V8SFmode);
40786 if (elt < 8)
40787 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40788 else
40789 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40790 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40791 return;
40793 case V8DFmode:
40794 tmp = gen_reg_rtx (V4DFmode);
40795 if (elt < 4)
40796 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40797 else
40798 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40799 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40800 return;
40802 case V16SImode:
40803 tmp = gen_reg_rtx (V8SImode);
40804 if (elt < 8)
40805 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40806 else
40807 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40808 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40809 return;
40811 case V8DImode:
40812 tmp = gen_reg_rtx (V4DImode);
40813 if (elt < 4)
40814 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40815 else
40816 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40817 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40818 return;
40820 case V8QImode:
40821 /* ??? Could extract the appropriate HImode element and shift. */
40822 default:
40823 break;
40826 if (use_vec_extr)
40828 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40829 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40831 /* Let the rtl optimizers know about the zero extension performed. */
40832 if (inner_mode == QImode || inner_mode == HImode)
40834 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40835 target = gen_lowpart (SImode, target);
40838 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40840 else
40842 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40844 emit_move_insn (mem, vec);
40846 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40847 emit_move_insn (target, tmp);
40851 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40852 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40853 The upper bits of DEST are undefined, though they shouldn't cause
40854 exceptions (some bits from src or all zeros are ok). */
40856 static void
40857 emit_reduc_half (rtx dest, rtx src, int i)
40859 rtx tem, d = dest;
40860 switch (GET_MODE (src))
40862 case V4SFmode:
40863 if (i == 128)
40864 tem = gen_sse_movhlps (dest, src, src);
40865 else
40866 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40867 GEN_INT (1 + 4), GEN_INT (1 + 4));
40868 break;
40869 case V2DFmode:
40870 tem = gen_vec_interleave_highv2df (dest, src, src);
40871 break;
40872 case V16QImode:
40873 case V8HImode:
40874 case V4SImode:
40875 case V2DImode:
40876 d = gen_reg_rtx (V1TImode);
40877 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40878 GEN_INT (i / 2));
40879 break;
40880 case V8SFmode:
40881 if (i == 256)
40882 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40883 else
40884 tem = gen_avx_shufps256 (dest, src, src,
40885 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40886 break;
40887 case V4DFmode:
40888 if (i == 256)
40889 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
40890 else
40891 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
40892 break;
40893 case V32QImode:
40894 case V16HImode:
40895 case V8SImode:
40896 case V4DImode:
40897 if (i == 256)
40899 if (GET_MODE (dest) != V4DImode)
40900 d = gen_reg_rtx (V4DImode);
40901 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
40902 gen_lowpart (V4DImode, src),
40903 const1_rtx);
40905 else
40907 d = gen_reg_rtx (V2TImode);
40908 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
40909 GEN_INT (i / 2));
40911 break;
40912 case V16SImode:
40913 case V16SFmode:
40914 case V8DImode:
40915 case V8DFmode:
40916 if (i > 128)
40917 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
40918 gen_lowpart (V16SImode, src),
40919 gen_lowpart (V16SImode, src),
40920 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
40921 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
40922 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
40923 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
40924 GEN_INT (0xC), GEN_INT (0xD),
40925 GEN_INT (0xE), GEN_INT (0xF),
40926 GEN_INT (0x10), GEN_INT (0x11),
40927 GEN_INT (0x12), GEN_INT (0x13),
40928 GEN_INT (0x14), GEN_INT (0x15),
40929 GEN_INT (0x16), GEN_INT (0x17));
40930 else
40931 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
40932 gen_lowpart (V16SImode, src),
40933 GEN_INT (i == 128 ? 0x2 : 0x1),
40934 GEN_INT (0x3),
40935 GEN_INT (0x3),
40936 GEN_INT (0x3),
40937 GEN_INT (i == 128 ? 0x6 : 0x5),
40938 GEN_INT (0x7),
40939 GEN_INT (0x7),
40940 GEN_INT (0x7),
40941 GEN_INT (i == 128 ? 0xA : 0x9),
40942 GEN_INT (0xB),
40943 GEN_INT (0xB),
40944 GEN_INT (0xB),
40945 GEN_INT (i == 128 ? 0xE : 0xD),
40946 GEN_INT (0xF),
40947 GEN_INT (0xF),
40948 GEN_INT (0xF));
40949 break;
40950 default:
40951 gcc_unreachable ();
40953 emit_insn (tem);
40954 if (d != dest)
40955 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
40958 /* Expand a vector reduction. FN is the binary pattern to reduce;
40959 DEST is the destination; IN is the input vector. */
40961 void
40962 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
40964 rtx half, dst, vec = in;
40965 enum machine_mode mode = GET_MODE (in);
40966 int i;
40968 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
40969 if (TARGET_SSE4_1
40970 && mode == V8HImode
40971 && fn == gen_uminv8hi3)
40973 emit_insn (gen_sse4_1_phminposuw (dest, in));
40974 return;
40977 for (i = GET_MODE_BITSIZE (mode);
40978 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
40979 i >>= 1)
40981 half = gen_reg_rtx (mode);
40982 emit_reduc_half (half, vec, i);
40983 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
40984 dst = dest;
40985 else
40986 dst = gen_reg_rtx (mode);
40987 emit_insn (fn (dst, half, vec));
40988 vec = dst;
40992 /* Target hook for scalar_mode_supported_p. */
40993 static bool
40994 ix86_scalar_mode_supported_p (enum machine_mode mode)
40996 if (DECIMAL_FLOAT_MODE_P (mode))
40997 return default_decimal_float_supported_p ();
40998 else if (mode == TFmode)
40999 return true;
41000 else
41001 return default_scalar_mode_supported_p (mode);
41004 /* Implements target hook vector_mode_supported_p. */
41005 static bool
41006 ix86_vector_mode_supported_p (enum machine_mode mode)
41008 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41009 return true;
41010 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41011 return true;
41012 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41013 return true;
41014 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41015 return true;
41016 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41017 return true;
41018 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41019 return true;
41020 return false;
41023 /* Target hook for c_mode_for_suffix. */
41024 static enum machine_mode
41025 ix86_c_mode_for_suffix (char suffix)
41027 if (suffix == 'q')
41028 return TFmode;
41029 if (suffix == 'w')
41030 return XFmode;
41032 return VOIDmode;
41035 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41037 We do this in the new i386 backend to maintain source compatibility
41038 with the old cc0-based compiler. */
41040 static tree
41041 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41042 tree inputs ATTRIBUTE_UNUSED,
41043 tree clobbers)
41045 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41046 clobbers);
41047 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41048 clobbers);
41049 return clobbers;
41052 /* Implements target vector targetm.asm.encode_section_info. */
41054 static void ATTRIBUTE_UNUSED
41055 ix86_encode_section_info (tree decl, rtx rtl, int first)
41057 default_encode_section_info (decl, rtl, first);
41059 if (TREE_CODE (decl) == VAR_DECL
41060 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41061 && ix86_in_large_data_p (decl))
41062 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41065 /* Worker function for REVERSE_CONDITION. */
41067 enum rtx_code
41068 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41070 return (mode != CCFPmode && mode != CCFPUmode
41071 ? reverse_condition (code)
41072 : reverse_condition_maybe_unordered (code));
41075 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41076 to OPERANDS[0]. */
41078 const char *
41079 output_387_reg_move (rtx insn, rtx *operands)
41081 if (REG_P (operands[0]))
41083 if (REG_P (operands[1])
41084 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41086 if (REGNO (operands[0]) == FIRST_STACK_REG)
41087 return output_387_ffreep (operands, 0);
41088 return "fstp\t%y0";
41090 if (STACK_TOP_P (operands[0]))
41091 return "fld%Z1\t%y1";
41092 return "fst\t%y0";
41094 else if (MEM_P (operands[0]))
41096 gcc_assert (REG_P (operands[1]));
41097 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41098 return "fstp%Z0\t%y0";
41099 else
41101 /* There is no non-popping store to memory for XFmode.
41102 So if we need one, follow the store with a load. */
41103 if (GET_MODE (operands[0]) == XFmode)
41104 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41105 else
41106 return "fst%Z0\t%y0";
41109 else
41110 gcc_unreachable();
41113 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41114 FP status register is set. */
41116 void
41117 ix86_emit_fp_unordered_jump (rtx label)
41119 rtx reg = gen_reg_rtx (HImode);
41120 rtx temp;
41122 emit_insn (gen_x86_fnstsw_1 (reg));
41124 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41126 emit_insn (gen_x86_sahf_1 (reg));
41128 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41129 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41131 else
41133 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41135 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41136 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41139 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41140 gen_rtx_LABEL_REF (VOIDmode, label),
41141 pc_rtx);
41142 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41144 emit_jump_insn (temp);
41145 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41148 /* Output code to perform a log1p XFmode calculation. */
41150 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41152 rtx label1 = gen_label_rtx ();
41153 rtx label2 = gen_label_rtx ();
41155 rtx tmp = gen_reg_rtx (XFmode);
41156 rtx tmp2 = gen_reg_rtx (XFmode);
41157 rtx test;
41159 emit_insn (gen_absxf2 (tmp, op1));
41160 test = gen_rtx_GE (VOIDmode, tmp,
41161 CONST_DOUBLE_FROM_REAL_VALUE (
41162 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41163 XFmode));
41164 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41166 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41167 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41168 emit_jump (label2);
41170 emit_label (label1);
41171 emit_move_insn (tmp, CONST1_RTX (XFmode));
41172 emit_insn (gen_addxf3 (tmp, op1, tmp));
41173 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41174 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41176 emit_label (label2);
41179 /* Emit code for round calculation. */
41180 void ix86_emit_i387_round (rtx op0, rtx op1)
41182 enum machine_mode inmode = GET_MODE (op1);
41183 enum machine_mode outmode = GET_MODE (op0);
41184 rtx e1, e2, res, tmp, tmp1, half;
41185 rtx scratch = gen_reg_rtx (HImode);
41186 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41187 rtx jump_label = gen_label_rtx ();
41188 rtx insn;
41189 rtx (*gen_abs) (rtx, rtx);
41190 rtx (*gen_neg) (rtx, rtx);
41192 switch (inmode)
41194 case SFmode:
41195 gen_abs = gen_abssf2;
41196 break;
41197 case DFmode:
41198 gen_abs = gen_absdf2;
41199 break;
41200 case XFmode:
41201 gen_abs = gen_absxf2;
41202 break;
41203 default:
41204 gcc_unreachable ();
41207 switch (outmode)
41209 case SFmode:
41210 gen_neg = gen_negsf2;
41211 break;
41212 case DFmode:
41213 gen_neg = gen_negdf2;
41214 break;
41215 case XFmode:
41216 gen_neg = gen_negxf2;
41217 break;
41218 case HImode:
41219 gen_neg = gen_neghi2;
41220 break;
41221 case SImode:
41222 gen_neg = gen_negsi2;
41223 break;
41224 case DImode:
41225 gen_neg = gen_negdi2;
41226 break;
41227 default:
41228 gcc_unreachable ();
41231 e1 = gen_reg_rtx (inmode);
41232 e2 = gen_reg_rtx (inmode);
41233 res = gen_reg_rtx (outmode);
41235 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41237 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41239 /* scratch = fxam(op1) */
41240 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41241 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41242 UNSPEC_FXAM)));
41243 /* e1 = fabs(op1) */
41244 emit_insn (gen_abs (e1, op1));
41246 /* e2 = e1 + 0.5 */
41247 half = force_reg (inmode, half);
41248 emit_insn (gen_rtx_SET (VOIDmode, e2,
41249 gen_rtx_PLUS (inmode, e1, half)));
41251 /* res = floor(e2) */
41252 if (inmode != XFmode)
41254 tmp1 = gen_reg_rtx (XFmode);
41256 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41257 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41259 else
41260 tmp1 = e2;
41262 switch (outmode)
41264 case SFmode:
41265 case DFmode:
41267 rtx tmp0 = gen_reg_rtx (XFmode);
41269 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41271 emit_insn (gen_rtx_SET (VOIDmode, res,
41272 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41273 UNSPEC_TRUNC_NOOP)));
41275 break;
41276 case XFmode:
41277 emit_insn (gen_frndintxf2_floor (res, tmp1));
41278 break;
41279 case HImode:
41280 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41281 break;
41282 case SImode:
41283 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41284 break;
41285 case DImode:
41286 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41287 break;
41288 default:
41289 gcc_unreachable ();
41292 /* flags = signbit(a) */
41293 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41295 /* if (flags) then res = -res */
41296 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41297 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41298 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41299 pc_rtx);
41300 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41301 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41302 JUMP_LABEL (insn) = jump_label;
41304 emit_insn (gen_neg (res, res));
41306 emit_label (jump_label);
41307 LABEL_NUSES (jump_label) = 1;
41309 emit_move_insn (op0, res);
41312 /* Output code to perform a Newton-Rhapson approximation of a single precision
41313 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41315 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41317 rtx x0, x1, e0, e1;
41319 x0 = gen_reg_rtx (mode);
41320 e0 = gen_reg_rtx (mode);
41321 e1 = gen_reg_rtx (mode);
41322 x1 = gen_reg_rtx (mode);
41324 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41326 b = force_reg (mode, b);
41328 /* x0 = rcp(b) estimate */
41329 if (mode == V16SFmode || mode == V8DFmode)
41330 emit_insn (gen_rtx_SET (VOIDmode, x0,
41331 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41332 UNSPEC_RCP14)));
41333 else
41334 emit_insn (gen_rtx_SET (VOIDmode, x0,
41335 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41336 UNSPEC_RCP)));
41338 /* e0 = x0 * b */
41339 emit_insn (gen_rtx_SET (VOIDmode, e0,
41340 gen_rtx_MULT (mode, x0, b)));
41342 /* e0 = x0 * e0 */
41343 emit_insn (gen_rtx_SET (VOIDmode, e0,
41344 gen_rtx_MULT (mode, x0, e0)));
41346 /* e1 = x0 + x0 */
41347 emit_insn (gen_rtx_SET (VOIDmode, e1,
41348 gen_rtx_PLUS (mode, x0, x0)));
41350 /* x1 = e1 - e0 */
41351 emit_insn (gen_rtx_SET (VOIDmode, x1,
41352 gen_rtx_MINUS (mode, e1, e0)));
41354 /* res = a * x1 */
41355 emit_insn (gen_rtx_SET (VOIDmode, res,
41356 gen_rtx_MULT (mode, a, x1)));
41359 /* Output code to perform a Newton-Rhapson approximation of a
41360 single precision floating point [reciprocal] square root. */
41362 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41363 bool recip)
41365 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41366 REAL_VALUE_TYPE r;
41367 int unspec;
41369 x0 = gen_reg_rtx (mode);
41370 e0 = gen_reg_rtx (mode);
41371 e1 = gen_reg_rtx (mode);
41372 e2 = gen_reg_rtx (mode);
41373 e3 = gen_reg_rtx (mode);
41375 real_from_integer (&r, VOIDmode, -3, -1, 0);
41376 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41378 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41379 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41380 unspec = UNSPEC_RSQRT;
41382 if (VECTOR_MODE_P (mode))
41384 mthree = ix86_build_const_vector (mode, true, mthree);
41385 mhalf = ix86_build_const_vector (mode, true, mhalf);
41386 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41387 if (GET_MODE_SIZE (mode) == 64)
41388 unspec = UNSPEC_RSQRT14;
41391 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41392 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41394 a = force_reg (mode, a);
41396 /* x0 = rsqrt(a) estimate */
41397 emit_insn (gen_rtx_SET (VOIDmode, x0,
41398 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41399 unspec)));
41401 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41402 if (!recip)
41404 rtx zero, mask;
41406 zero = gen_reg_rtx (mode);
41407 mask = gen_reg_rtx (mode);
41409 zero = force_reg (mode, CONST0_RTX(mode));
41411 /* Handle masked compare. */
41412 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41414 mask = gen_reg_rtx (HImode);
41415 /* Imm value 0x4 corresponds to not-equal comparison. */
41416 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41417 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41419 else
41421 emit_insn (gen_rtx_SET (VOIDmode, mask,
41422 gen_rtx_NE (mode, zero, a)));
41424 emit_insn (gen_rtx_SET (VOIDmode, x0,
41425 gen_rtx_AND (mode, x0, mask)));
41429 /* e0 = x0 * a */
41430 emit_insn (gen_rtx_SET (VOIDmode, e0,
41431 gen_rtx_MULT (mode, x0, a)));
41432 /* e1 = e0 * x0 */
41433 emit_insn (gen_rtx_SET (VOIDmode, e1,
41434 gen_rtx_MULT (mode, e0, x0)));
41436 /* e2 = e1 - 3. */
41437 mthree = force_reg (mode, mthree);
41438 emit_insn (gen_rtx_SET (VOIDmode, e2,
41439 gen_rtx_PLUS (mode, e1, mthree)));
41441 mhalf = force_reg (mode, mhalf);
41442 if (recip)
41443 /* e3 = -.5 * x0 */
41444 emit_insn (gen_rtx_SET (VOIDmode, e3,
41445 gen_rtx_MULT (mode, x0, mhalf)));
41446 else
41447 /* e3 = -.5 * e0 */
41448 emit_insn (gen_rtx_SET (VOIDmode, e3,
41449 gen_rtx_MULT (mode, e0, mhalf)));
41450 /* ret = e2 * e3 */
41451 emit_insn (gen_rtx_SET (VOIDmode, res,
41452 gen_rtx_MULT (mode, e2, e3)));
41455 #ifdef TARGET_SOLARIS
41456 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41458 static void
41459 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41460 tree decl)
41462 /* With Binutils 2.15, the "@unwind" marker must be specified on
41463 every occurrence of the ".eh_frame" section, not just the first
41464 one. */
41465 if (TARGET_64BIT
41466 && strcmp (name, ".eh_frame") == 0)
41468 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41469 flags & SECTION_WRITE ? "aw" : "a");
41470 return;
41473 #ifndef USE_GAS
41474 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41476 solaris_elf_asm_comdat_section (name, flags, decl);
41477 return;
41479 #endif
41481 default_elf_asm_named_section (name, flags, decl);
41483 #endif /* TARGET_SOLARIS */
41485 /* Return the mangling of TYPE if it is an extended fundamental type. */
41487 static const char *
41488 ix86_mangle_type (const_tree type)
41490 type = TYPE_MAIN_VARIANT (type);
41492 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41493 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41494 return NULL;
41496 switch (TYPE_MODE (type))
41498 case TFmode:
41499 /* __float128 is "g". */
41500 return "g";
41501 case XFmode:
41502 /* "long double" or __float80 is "e". */
41503 return "e";
41504 default:
41505 return NULL;
41509 /* For 32-bit code we can save PIC register setup by using
41510 __stack_chk_fail_local hidden function instead of calling
41511 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41512 register, so it is better to call __stack_chk_fail directly. */
41514 static tree ATTRIBUTE_UNUSED
41515 ix86_stack_protect_fail (void)
41517 return TARGET_64BIT
41518 ? default_external_stack_protect_fail ()
41519 : default_hidden_stack_protect_fail ();
41522 /* Select a format to encode pointers in exception handling data. CODE
41523 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41524 true if the symbol may be affected by dynamic relocations.
41526 ??? All x86 object file formats are capable of representing this.
41527 After all, the relocation needed is the same as for the call insn.
41528 Whether or not a particular assembler allows us to enter such, I
41529 guess we'll have to see. */
41531 asm_preferred_eh_data_format (int code, int global)
41533 if (flag_pic)
41535 int type = DW_EH_PE_sdata8;
41536 if (!TARGET_64BIT
41537 || ix86_cmodel == CM_SMALL_PIC
41538 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41539 type = DW_EH_PE_sdata4;
41540 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41542 if (ix86_cmodel == CM_SMALL
41543 || (ix86_cmodel == CM_MEDIUM && code))
41544 return DW_EH_PE_udata4;
41545 return DW_EH_PE_absptr;
41548 /* Expand copysign from SIGN to the positive value ABS_VALUE
41549 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41550 the sign-bit. */
41551 static void
41552 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41554 enum machine_mode mode = GET_MODE (sign);
41555 rtx sgn = gen_reg_rtx (mode);
41556 if (mask == NULL_RTX)
41558 enum machine_mode vmode;
41560 if (mode == SFmode)
41561 vmode = V4SFmode;
41562 else if (mode == DFmode)
41563 vmode = V2DFmode;
41564 else
41565 vmode = mode;
41567 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41568 if (!VECTOR_MODE_P (mode))
41570 /* We need to generate a scalar mode mask in this case. */
41571 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41572 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41573 mask = gen_reg_rtx (mode);
41574 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41577 else
41578 mask = gen_rtx_NOT (mode, mask);
41579 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41580 gen_rtx_AND (mode, mask, sign)));
41581 emit_insn (gen_rtx_SET (VOIDmode, result,
41582 gen_rtx_IOR (mode, abs_value, sgn)));
41585 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41586 mask for masking out the sign-bit is stored in *SMASK, if that is
41587 non-null. */
41588 static rtx
41589 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41591 enum machine_mode vmode, mode = GET_MODE (op0);
41592 rtx xa, mask;
41594 xa = gen_reg_rtx (mode);
41595 if (mode == SFmode)
41596 vmode = V4SFmode;
41597 else if (mode == DFmode)
41598 vmode = V2DFmode;
41599 else
41600 vmode = mode;
41601 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41602 if (!VECTOR_MODE_P (mode))
41604 /* We need to generate a scalar mode mask in this case. */
41605 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41606 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41607 mask = gen_reg_rtx (mode);
41608 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41610 emit_insn (gen_rtx_SET (VOIDmode, xa,
41611 gen_rtx_AND (mode, op0, mask)));
41613 if (smask)
41614 *smask = mask;
41616 return xa;
41619 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41620 swapping the operands if SWAP_OPERANDS is true. The expanded
41621 code is a forward jump to a newly created label in case the
41622 comparison is true. The generated label rtx is returned. */
41623 static rtx
41624 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41625 bool swap_operands)
41627 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41628 rtx label, tmp;
41630 if (swap_operands)
41632 tmp = op0;
41633 op0 = op1;
41634 op1 = tmp;
41637 label = gen_label_rtx ();
41638 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41639 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41640 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41641 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41642 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41643 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41644 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41645 JUMP_LABEL (tmp) = label;
41647 return label;
41650 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41651 using comparison code CODE. Operands are swapped for the comparison if
41652 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41653 static rtx
41654 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41655 bool swap_operands)
41657 rtx (*insn)(rtx, rtx, rtx, rtx);
41658 enum machine_mode mode = GET_MODE (op0);
41659 rtx mask = gen_reg_rtx (mode);
41661 if (swap_operands)
41663 rtx tmp = op0;
41664 op0 = op1;
41665 op1 = tmp;
41668 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41670 emit_insn (insn (mask, op0, op1,
41671 gen_rtx_fmt_ee (code, mode, op0, op1)));
41672 return mask;
41675 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41676 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41677 static rtx
41678 ix86_gen_TWO52 (enum machine_mode mode)
41680 REAL_VALUE_TYPE TWO52r;
41681 rtx TWO52;
41683 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41684 TWO52 = const_double_from_real_value (TWO52r, mode);
41685 TWO52 = force_reg (mode, TWO52);
41687 return TWO52;
41690 /* Expand SSE sequence for computing lround from OP1 storing
41691 into OP0. */
41692 void
41693 ix86_expand_lround (rtx op0, rtx op1)
41695 /* C code for the stuff we're doing below:
41696 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41697 return (long)tmp;
41699 enum machine_mode mode = GET_MODE (op1);
41700 const struct real_format *fmt;
41701 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41702 rtx adj;
41704 /* load nextafter (0.5, 0.0) */
41705 fmt = REAL_MODE_FORMAT (mode);
41706 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41707 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41709 /* adj = copysign (0.5, op1) */
41710 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41711 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41713 /* adj = op1 + adj */
41714 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41716 /* op0 = (imode)adj */
41717 expand_fix (op0, adj, 0);
41720 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41721 into OPERAND0. */
41722 void
41723 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41725 /* C code for the stuff we're doing below (for do_floor):
41726 xi = (long)op1;
41727 xi -= (double)xi > op1 ? 1 : 0;
41728 return xi;
41730 enum machine_mode fmode = GET_MODE (op1);
41731 enum machine_mode imode = GET_MODE (op0);
41732 rtx ireg, freg, label, tmp;
41734 /* reg = (long)op1 */
41735 ireg = gen_reg_rtx (imode);
41736 expand_fix (ireg, op1, 0);
41738 /* freg = (double)reg */
41739 freg = gen_reg_rtx (fmode);
41740 expand_float (freg, ireg, 0);
41742 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41743 label = ix86_expand_sse_compare_and_jump (UNLE,
41744 freg, op1, !do_floor);
41745 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41746 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41747 emit_move_insn (ireg, tmp);
41749 emit_label (label);
41750 LABEL_NUSES (label) = 1;
41752 emit_move_insn (op0, ireg);
41755 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41756 result in OPERAND0. */
41757 void
41758 ix86_expand_rint (rtx operand0, rtx operand1)
41760 /* C code for the stuff we're doing below:
41761 xa = fabs (operand1);
41762 if (!isless (xa, 2**52))
41763 return operand1;
41764 xa = xa + 2**52 - 2**52;
41765 return copysign (xa, operand1);
41767 enum machine_mode mode = GET_MODE (operand0);
41768 rtx res, xa, label, TWO52, mask;
41770 res = gen_reg_rtx (mode);
41771 emit_move_insn (res, operand1);
41773 /* xa = abs (operand1) */
41774 xa = ix86_expand_sse_fabs (res, &mask);
41776 /* if (!isless (xa, TWO52)) goto label; */
41777 TWO52 = ix86_gen_TWO52 (mode);
41778 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41780 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41781 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41783 ix86_sse_copysign_to_positive (res, xa, res, mask);
41785 emit_label (label);
41786 LABEL_NUSES (label) = 1;
41788 emit_move_insn (operand0, res);
41791 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41792 into OPERAND0. */
41793 void
41794 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41796 /* C code for the stuff we expand below.
41797 double xa = fabs (x), x2;
41798 if (!isless (xa, TWO52))
41799 return x;
41800 xa = xa + TWO52 - TWO52;
41801 x2 = copysign (xa, x);
41802 Compensate. Floor:
41803 if (x2 > x)
41804 x2 -= 1;
41805 Compensate. Ceil:
41806 if (x2 < x)
41807 x2 -= -1;
41808 return x2;
41810 enum machine_mode mode = GET_MODE (operand0);
41811 rtx xa, TWO52, tmp, label, one, res, mask;
41813 TWO52 = ix86_gen_TWO52 (mode);
41815 /* Temporary for holding the result, initialized to the input
41816 operand to ease control flow. */
41817 res = gen_reg_rtx (mode);
41818 emit_move_insn (res, operand1);
41820 /* xa = abs (operand1) */
41821 xa = ix86_expand_sse_fabs (res, &mask);
41823 /* if (!isless (xa, TWO52)) goto label; */
41824 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41826 /* xa = xa + TWO52 - TWO52; */
41827 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41828 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41830 /* xa = copysign (xa, operand1) */
41831 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41833 /* generate 1.0 or -1.0 */
41834 one = force_reg (mode,
41835 const_double_from_real_value (do_floor
41836 ? dconst1 : dconstm1, mode));
41838 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41839 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41840 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41841 gen_rtx_AND (mode, one, tmp)));
41842 /* We always need to subtract here to preserve signed zero. */
41843 tmp = expand_simple_binop (mode, MINUS,
41844 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41845 emit_move_insn (res, tmp);
41847 emit_label (label);
41848 LABEL_NUSES (label) = 1;
41850 emit_move_insn (operand0, res);
41853 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41854 into OPERAND0. */
41855 void
41856 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41858 /* C code for the stuff we expand below.
41859 double xa = fabs (x), x2;
41860 if (!isless (xa, TWO52))
41861 return x;
41862 x2 = (double)(long)x;
41863 Compensate. Floor:
41864 if (x2 > x)
41865 x2 -= 1;
41866 Compensate. Ceil:
41867 if (x2 < x)
41868 x2 += 1;
41869 if (HONOR_SIGNED_ZEROS (mode))
41870 return copysign (x2, x);
41871 return x2;
41873 enum machine_mode mode = GET_MODE (operand0);
41874 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41876 TWO52 = ix86_gen_TWO52 (mode);
41878 /* Temporary for holding the result, initialized to the input
41879 operand to ease control flow. */
41880 res = gen_reg_rtx (mode);
41881 emit_move_insn (res, operand1);
41883 /* xa = abs (operand1) */
41884 xa = ix86_expand_sse_fabs (res, &mask);
41886 /* if (!isless (xa, TWO52)) goto label; */
41887 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41889 /* xa = (double)(long)x */
41890 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
41891 expand_fix (xi, res, 0);
41892 expand_float (xa, xi, 0);
41894 /* generate 1.0 */
41895 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
41897 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41898 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41899 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41900 gen_rtx_AND (mode, one, tmp)));
41901 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
41902 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41903 emit_move_insn (res, tmp);
41905 if (HONOR_SIGNED_ZEROS (mode))
41906 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
41908 emit_label (label);
41909 LABEL_NUSES (label) = 1;
41911 emit_move_insn (operand0, res);
41914 /* Expand SSE sequence for computing round from OPERAND1 storing
41915 into OPERAND0. Sequence that works without relying on DImode truncation
41916 via cvttsd2siq that is only available on 64bit targets. */
41917 void
41918 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
41920 /* C code for the stuff we expand below.
41921 double xa = fabs (x), xa2, x2;
41922 if (!isless (xa, TWO52))
41923 return x;
41924 Using the absolute value and copying back sign makes
41925 -0.0 -> -0.0 correct.
41926 xa2 = xa + TWO52 - TWO52;
41927 Compensate.
41928 dxa = xa2 - xa;
41929 if (dxa <= -0.5)
41930 xa2 += 1;
41931 else if (dxa > 0.5)
41932 xa2 -= 1;
41933 x2 = copysign (xa2, x);
41934 return x2;
41936 enum machine_mode mode = GET_MODE (operand0);
41937 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
41939 TWO52 = ix86_gen_TWO52 (mode);
41941 /* Temporary for holding the result, initialized to the input
41942 operand to ease control flow. */
41943 res = gen_reg_rtx (mode);
41944 emit_move_insn (res, operand1);
41946 /* xa = abs (operand1) */
41947 xa = ix86_expand_sse_fabs (res, &mask);
41949 /* if (!isless (xa, TWO52)) goto label; */
41950 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41952 /* xa2 = xa + TWO52 - TWO52; */
41953 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41954 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
41956 /* dxa = xa2 - xa; */
41957 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
41959 /* generate 0.5, 1.0 and -0.5 */
41960 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
41961 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
41962 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
41963 0, OPTAB_DIRECT);
41965 /* Compensate. */
41966 tmp = gen_reg_rtx (mode);
41967 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
41968 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
41969 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41970 gen_rtx_AND (mode, one, tmp)));
41971 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41972 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
41973 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
41974 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41975 gen_rtx_AND (mode, one, tmp)));
41976 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41978 /* res = copysign (xa2, operand1) */
41979 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
41981 emit_label (label);
41982 LABEL_NUSES (label) = 1;
41984 emit_move_insn (operand0, res);
41987 /* Expand SSE sequence for computing trunc from OPERAND1 storing
41988 into OPERAND0. */
41989 void
41990 ix86_expand_trunc (rtx operand0, rtx operand1)
41992 /* C code for SSE variant we expand below.
41993 double xa = fabs (x), x2;
41994 if (!isless (xa, TWO52))
41995 return x;
41996 x2 = (double)(long)x;
41997 if (HONOR_SIGNED_ZEROS (mode))
41998 return copysign (x2, x);
41999 return x2;
42001 enum machine_mode mode = GET_MODE (operand0);
42002 rtx xa, xi, TWO52, label, res, mask;
42004 TWO52 = ix86_gen_TWO52 (mode);
42006 /* Temporary for holding the result, initialized to the input
42007 operand to ease control flow. */
42008 res = gen_reg_rtx (mode);
42009 emit_move_insn (res, operand1);
42011 /* xa = abs (operand1) */
42012 xa = ix86_expand_sse_fabs (res, &mask);
42014 /* if (!isless (xa, TWO52)) goto label; */
42015 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42017 /* x = (double)(long)x */
42018 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42019 expand_fix (xi, res, 0);
42020 expand_float (res, xi, 0);
42022 if (HONOR_SIGNED_ZEROS (mode))
42023 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42025 emit_label (label);
42026 LABEL_NUSES (label) = 1;
42028 emit_move_insn (operand0, res);
42031 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42032 into OPERAND0. */
42033 void
42034 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42036 enum machine_mode mode = GET_MODE (operand0);
42037 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42039 /* C code for SSE variant we expand below.
42040 double xa = fabs (x), x2;
42041 if (!isless (xa, TWO52))
42042 return x;
42043 xa2 = xa + TWO52 - TWO52;
42044 Compensate:
42045 if (xa2 > xa)
42046 xa2 -= 1.0;
42047 x2 = copysign (xa2, x);
42048 return x2;
42051 TWO52 = ix86_gen_TWO52 (mode);
42053 /* Temporary for holding the result, initialized to the input
42054 operand to ease control flow. */
42055 res = gen_reg_rtx (mode);
42056 emit_move_insn (res, operand1);
42058 /* xa = abs (operand1) */
42059 xa = ix86_expand_sse_fabs (res, &smask);
42061 /* if (!isless (xa, TWO52)) goto label; */
42062 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42064 /* res = xa + TWO52 - TWO52; */
42065 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42066 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42067 emit_move_insn (res, tmp);
42069 /* generate 1.0 */
42070 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42072 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42073 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42074 emit_insn (gen_rtx_SET (VOIDmode, mask,
42075 gen_rtx_AND (mode, mask, one)));
42076 tmp = expand_simple_binop (mode, MINUS,
42077 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42078 emit_move_insn (res, tmp);
42080 /* res = copysign (res, operand1) */
42081 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42083 emit_label (label);
42084 LABEL_NUSES (label) = 1;
42086 emit_move_insn (operand0, res);
42089 /* Expand SSE sequence for computing round from OPERAND1 storing
42090 into OPERAND0. */
42091 void
42092 ix86_expand_round (rtx operand0, rtx operand1)
42094 /* C code for the stuff we're doing below:
42095 double xa = fabs (x);
42096 if (!isless (xa, TWO52))
42097 return x;
42098 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42099 return copysign (xa, x);
42101 enum machine_mode mode = GET_MODE (operand0);
42102 rtx res, TWO52, xa, label, xi, half, mask;
42103 const struct real_format *fmt;
42104 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42106 /* Temporary for holding the result, initialized to the input
42107 operand to ease control flow. */
42108 res = gen_reg_rtx (mode);
42109 emit_move_insn (res, operand1);
42111 TWO52 = ix86_gen_TWO52 (mode);
42112 xa = ix86_expand_sse_fabs (res, &mask);
42113 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42115 /* load nextafter (0.5, 0.0) */
42116 fmt = REAL_MODE_FORMAT (mode);
42117 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42118 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42120 /* xa = xa + 0.5 */
42121 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42122 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42124 /* xa = (double)(int64_t)xa */
42125 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42126 expand_fix (xi, xa, 0);
42127 expand_float (xa, xi, 0);
42129 /* res = copysign (xa, operand1) */
42130 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42132 emit_label (label);
42133 LABEL_NUSES (label) = 1;
42135 emit_move_insn (operand0, res);
42138 /* Expand SSE sequence for computing round
42139 from OP1 storing into OP0 using sse4 round insn. */
42140 void
42141 ix86_expand_round_sse4 (rtx op0, rtx op1)
42143 enum machine_mode mode = GET_MODE (op0);
42144 rtx e1, e2, res, half;
42145 const struct real_format *fmt;
42146 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42147 rtx (*gen_copysign) (rtx, rtx, rtx);
42148 rtx (*gen_round) (rtx, rtx, rtx);
42150 switch (mode)
42152 case SFmode:
42153 gen_copysign = gen_copysignsf3;
42154 gen_round = gen_sse4_1_roundsf2;
42155 break;
42156 case DFmode:
42157 gen_copysign = gen_copysigndf3;
42158 gen_round = gen_sse4_1_rounddf2;
42159 break;
42160 default:
42161 gcc_unreachable ();
42164 /* round (a) = trunc (a + copysign (0.5, a)) */
42166 /* load nextafter (0.5, 0.0) */
42167 fmt = REAL_MODE_FORMAT (mode);
42168 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42169 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42170 half = const_double_from_real_value (pred_half, mode);
42172 /* e1 = copysign (0.5, op1) */
42173 e1 = gen_reg_rtx (mode);
42174 emit_insn (gen_copysign (e1, half, op1));
42176 /* e2 = op1 + e1 */
42177 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42179 /* res = trunc (e2) */
42180 res = gen_reg_rtx (mode);
42181 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42183 emit_move_insn (op0, res);
42187 /* Table of valid machine attributes. */
42188 static const struct attribute_spec ix86_attribute_table[] =
42190 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42191 affects_type_identity } */
42192 /* Stdcall attribute says callee is responsible for popping arguments
42193 if they are not variable. */
42194 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42195 true },
42196 /* Fastcall attribute says callee is responsible for popping arguments
42197 if they are not variable. */
42198 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42199 true },
42200 /* Thiscall attribute says callee is responsible for popping arguments
42201 if they are not variable. */
42202 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42203 true },
42204 /* Cdecl attribute says the callee is a normal C declaration */
42205 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42206 true },
42207 /* Regparm attribute specifies how many integer arguments are to be
42208 passed in registers. */
42209 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42210 true },
42211 /* Sseregparm attribute says we are using x86_64 calling conventions
42212 for FP arguments. */
42213 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42214 true },
42215 /* The transactional memory builtins are implicitly regparm or fastcall
42216 depending on the ABI. Override the generic do-nothing attribute that
42217 these builtins were declared with. */
42218 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42219 true },
42220 /* force_align_arg_pointer says this function realigns the stack at entry. */
42221 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42222 false, true, true, ix86_handle_cconv_attribute, false },
42223 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42224 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42225 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42226 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42227 false },
42228 #endif
42229 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42230 false },
42231 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42232 false },
42233 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42234 SUBTARGET_ATTRIBUTE_TABLE,
42235 #endif
42236 /* ms_abi and sysv_abi calling convention function attributes. */
42237 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42238 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42239 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42240 false },
42241 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42242 ix86_handle_callee_pop_aggregate_return, true },
42243 /* End element. */
42244 { NULL, 0, 0, false, false, false, NULL, false }
42247 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42248 static int
42249 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42250 tree vectype,
42251 int misalign ATTRIBUTE_UNUSED)
42253 unsigned elements;
42255 switch (type_of_cost)
42257 case scalar_stmt:
42258 return ix86_cost->scalar_stmt_cost;
42260 case scalar_load:
42261 return ix86_cost->scalar_load_cost;
42263 case scalar_store:
42264 return ix86_cost->scalar_store_cost;
42266 case vector_stmt:
42267 return ix86_cost->vec_stmt_cost;
42269 case vector_load:
42270 return ix86_cost->vec_align_load_cost;
42272 case vector_store:
42273 return ix86_cost->vec_store_cost;
42275 case vec_to_scalar:
42276 return ix86_cost->vec_to_scalar_cost;
42278 case scalar_to_vec:
42279 return ix86_cost->scalar_to_vec_cost;
42281 case unaligned_load:
42282 case unaligned_store:
42283 return ix86_cost->vec_unalign_load_cost;
42285 case cond_branch_taken:
42286 return ix86_cost->cond_taken_branch_cost;
42288 case cond_branch_not_taken:
42289 return ix86_cost->cond_not_taken_branch_cost;
42291 case vec_perm:
42292 case vec_promote_demote:
42293 return ix86_cost->vec_stmt_cost;
42295 case vec_construct:
42296 elements = TYPE_VECTOR_SUBPARTS (vectype);
42297 return elements / 2 + 1;
42299 default:
42300 gcc_unreachable ();
42304 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42305 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42306 insn every time. */
42308 static GTY(()) rtx vselect_insn;
42310 /* Initialize vselect_insn. */
42312 static void
42313 init_vselect_insn (void)
42315 unsigned i;
42316 rtx x;
42318 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42319 for (i = 0; i < MAX_VECT_LEN; ++i)
42320 XVECEXP (x, 0, i) = const0_rtx;
42321 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42322 const0_rtx), x);
42323 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42324 start_sequence ();
42325 vselect_insn = emit_insn (x);
42326 end_sequence ();
42329 /* Construct (set target (vec_select op0 (parallel perm))) and
42330 return true if that's a valid instruction in the active ISA. */
42332 static bool
42333 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42334 unsigned nelt, bool testing_p)
42336 unsigned int i;
42337 rtx x, save_vconcat;
42338 int icode;
42340 if (vselect_insn == NULL_RTX)
42341 init_vselect_insn ();
42343 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42344 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42345 for (i = 0; i < nelt; ++i)
42346 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42347 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42348 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42349 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42350 SET_DEST (PATTERN (vselect_insn)) = target;
42351 icode = recog_memoized (vselect_insn);
42353 if (icode >= 0 && !testing_p)
42354 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42356 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42357 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42358 INSN_CODE (vselect_insn) = -1;
42360 return icode >= 0;
42363 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42365 static bool
42366 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42367 const unsigned char *perm, unsigned nelt,
42368 bool testing_p)
42370 enum machine_mode v2mode;
42371 rtx x;
42372 bool ok;
42374 if (vselect_insn == NULL_RTX)
42375 init_vselect_insn ();
42377 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42378 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42379 PUT_MODE (x, v2mode);
42380 XEXP (x, 0) = op0;
42381 XEXP (x, 1) = op1;
42382 ok = expand_vselect (target, x, perm, nelt, testing_p);
42383 XEXP (x, 0) = const0_rtx;
42384 XEXP (x, 1) = const0_rtx;
42385 return ok;
42388 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42389 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42391 static bool
42392 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42394 enum machine_mode vmode = d->vmode;
42395 unsigned i, mask, nelt = d->nelt;
42396 rtx target, op0, op1, x;
42397 rtx rperm[32], vperm;
42399 if (d->one_operand_p)
42400 return false;
42401 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42403 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42405 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42407 else
42408 return false;
42410 /* This is a blend, not a permute. Elements must stay in their
42411 respective lanes. */
42412 for (i = 0; i < nelt; ++i)
42414 unsigned e = d->perm[i];
42415 if (!(e == i || e == i + nelt))
42416 return false;
42419 if (d->testing_p)
42420 return true;
42422 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42423 decision should be extracted elsewhere, so that we only try that
42424 sequence once all budget==3 options have been tried. */
42425 target = d->target;
42426 op0 = d->op0;
42427 op1 = d->op1;
42428 mask = 0;
42430 switch (vmode)
42432 case V4DFmode:
42433 case V8SFmode:
42434 case V2DFmode:
42435 case V4SFmode:
42436 case V8HImode:
42437 case V8SImode:
42438 for (i = 0; i < nelt; ++i)
42439 mask |= (d->perm[i] >= nelt) << i;
42440 break;
42442 case V2DImode:
42443 for (i = 0; i < 2; ++i)
42444 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42445 vmode = V8HImode;
42446 goto do_subreg;
42448 case V4SImode:
42449 for (i = 0; i < 4; ++i)
42450 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42451 vmode = V8HImode;
42452 goto do_subreg;
42454 case V16QImode:
42455 /* See if bytes move in pairs so we can use pblendw with
42456 an immediate argument, rather than pblendvb with a vector
42457 argument. */
42458 for (i = 0; i < 16; i += 2)
42459 if (d->perm[i] + 1 != d->perm[i + 1])
42461 use_pblendvb:
42462 for (i = 0; i < nelt; ++i)
42463 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42465 finish_pblendvb:
42466 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42467 vperm = force_reg (vmode, vperm);
42469 if (GET_MODE_SIZE (vmode) == 16)
42470 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42471 else
42472 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42473 if (target != d->target)
42474 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42475 return true;
42478 for (i = 0; i < 8; ++i)
42479 mask |= (d->perm[i * 2] >= 16) << i;
42480 vmode = V8HImode;
42481 /* FALLTHRU */
42483 do_subreg:
42484 target = gen_reg_rtx (vmode);
42485 op0 = gen_lowpart (vmode, op0);
42486 op1 = gen_lowpart (vmode, op1);
42487 break;
42489 case V32QImode:
42490 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42491 for (i = 0; i < 32; i += 2)
42492 if (d->perm[i] + 1 != d->perm[i + 1])
42493 goto use_pblendvb;
42494 /* See if bytes move in quadruplets. If yes, vpblendd
42495 with immediate can be used. */
42496 for (i = 0; i < 32; i += 4)
42497 if (d->perm[i] + 2 != d->perm[i + 2])
42498 break;
42499 if (i < 32)
42501 /* See if bytes move the same in both lanes. If yes,
42502 vpblendw with immediate can be used. */
42503 for (i = 0; i < 16; i += 2)
42504 if (d->perm[i] + 16 != d->perm[i + 16])
42505 goto use_pblendvb;
42507 /* Use vpblendw. */
42508 for (i = 0; i < 16; ++i)
42509 mask |= (d->perm[i * 2] >= 32) << i;
42510 vmode = V16HImode;
42511 goto do_subreg;
42514 /* Use vpblendd. */
42515 for (i = 0; i < 8; ++i)
42516 mask |= (d->perm[i * 4] >= 32) << i;
42517 vmode = V8SImode;
42518 goto do_subreg;
42520 case V16HImode:
42521 /* See if words move in pairs. If yes, vpblendd can be used. */
42522 for (i = 0; i < 16; i += 2)
42523 if (d->perm[i] + 1 != d->perm[i + 1])
42524 break;
42525 if (i < 16)
42527 /* See if words move the same in both lanes. If not,
42528 vpblendvb must be used. */
42529 for (i = 0; i < 8; i++)
42530 if (d->perm[i] + 8 != d->perm[i + 8])
42532 /* Use vpblendvb. */
42533 for (i = 0; i < 32; ++i)
42534 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42536 vmode = V32QImode;
42537 nelt = 32;
42538 target = gen_reg_rtx (vmode);
42539 op0 = gen_lowpart (vmode, op0);
42540 op1 = gen_lowpart (vmode, op1);
42541 goto finish_pblendvb;
42544 /* Use vpblendw. */
42545 for (i = 0; i < 16; ++i)
42546 mask |= (d->perm[i] >= 16) << i;
42547 break;
42550 /* Use vpblendd. */
42551 for (i = 0; i < 8; ++i)
42552 mask |= (d->perm[i * 2] >= 16) << i;
42553 vmode = V8SImode;
42554 goto do_subreg;
42556 case V4DImode:
42557 /* Use vpblendd. */
42558 for (i = 0; i < 4; ++i)
42559 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42560 vmode = V8SImode;
42561 goto do_subreg;
42563 default:
42564 gcc_unreachable ();
42567 /* This matches five different patterns with the different modes. */
42568 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42569 x = gen_rtx_SET (VOIDmode, target, x);
42570 emit_insn (x);
42571 if (target != d->target)
42572 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42574 return true;
42577 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42578 in terms of the variable form of vpermilps.
42580 Note that we will have already failed the immediate input vpermilps,
42581 which requires that the high and low part shuffle be identical; the
42582 variable form doesn't require that. */
42584 static bool
42585 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42587 rtx rperm[8], vperm;
42588 unsigned i;
42590 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42591 return false;
42593 /* We can only permute within the 128-bit lane. */
42594 for (i = 0; i < 8; ++i)
42596 unsigned e = d->perm[i];
42597 if (i < 4 ? e >= 4 : e < 4)
42598 return false;
42601 if (d->testing_p)
42602 return true;
42604 for (i = 0; i < 8; ++i)
42606 unsigned e = d->perm[i];
42608 /* Within each 128-bit lane, the elements of op0 are numbered
42609 from 0 and the elements of op1 are numbered from 4. */
42610 if (e >= 8 + 4)
42611 e -= 8;
42612 else if (e >= 4)
42613 e -= 4;
42615 rperm[i] = GEN_INT (e);
42618 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42619 vperm = force_reg (V8SImode, vperm);
42620 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42622 return true;
42625 /* Return true if permutation D can be performed as VMODE permutation
42626 instead. */
42628 static bool
42629 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42631 unsigned int i, j, chunk;
42633 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42634 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42635 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42636 return false;
42638 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42639 return true;
42641 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42642 for (i = 0; i < d->nelt; i += chunk)
42643 if (d->perm[i] & (chunk - 1))
42644 return false;
42645 else
42646 for (j = 1; j < chunk; ++j)
42647 if (d->perm[i] + j != d->perm[i + j])
42648 return false;
42650 return true;
42653 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42654 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42656 static bool
42657 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42659 unsigned i, nelt, eltsz, mask;
42660 unsigned char perm[32];
42661 enum machine_mode vmode = V16QImode;
42662 rtx rperm[32], vperm, target, op0, op1;
42664 nelt = d->nelt;
42666 if (!d->one_operand_p)
42668 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42670 if (TARGET_AVX2
42671 && valid_perm_using_mode_p (V2TImode, d))
42673 if (d->testing_p)
42674 return true;
42676 /* Use vperm2i128 insn. The pattern uses
42677 V4DImode instead of V2TImode. */
42678 target = d->target;
42679 if (d->vmode != V4DImode)
42680 target = gen_reg_rtx (V4DImode);
42681 op0 = gen_lowpart (V4DImode, d->op0);
42682 op1 = gen_lowpart (V4DImode, d->op1);
42683 rperm[0]
42684 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42685 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42686 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42687 if (target != d->target)
42688 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42689 return true;
42691 return false;
42694 else
42696 if (GET_MODE_SIZE (d->vmode) == 16)
42698 if (!TARGET_SSSE3)
42699 return false;
42701 else if (GET_MODE_SIZE (d->vmode) == 32)
42703 if (!TARGET_AVX2)
42704 return false;
42706 /* V4DImode should be already handled through
42707 expand_vselect by vpermq instruction. */
42708 gcc_assert (d->vmode != V4DImode);
42710 vmode = V32QImode;
42711 if (d->vmode == V8SImode
42712 || d->vmode == V16HImode
42713 || d->vmode == V32QImode)
42715 /* First see if vpermq can be used for
42716 V8SImode/V16HImode/V32QImode. */
42717 if (valid_perm_using_mode_p (V4DImode, d))
42719 for (i = 0; i < 4; i++)
42720 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42721 if (d->testing_p)
42722 return true;
42723 target = gen_reg_rtx (V4DImode);
42724 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42725 perm, 4, false))
42727 emit_move_insn (d->target,
42728 gen_lowpart (d->vmode, target));
42729 return true;
42731 return false;
42734 /* Next see if vpermd can be used. */
42735 if (valid_perm_using_mode_p (V8SImode, d))
42736 vmode = V8SImode;
42738 /* Or if vpermps can be used. */
42739 else if (d->vmode == V8SFmode)
42740 vmode = V8SImode;
42742 if (vmode == V32QImode)
42744 /* vpshufb only works intra lanes, it is not
42745 possible to shuffle bytes in between the lanes. */
42746 for (i = 0; i < nelt; ++i)
42747 if ((d->perm[i] ^ i) & (nelt / 2))
42748 return false;
42751 else
42752 return false;
42755 if (d->testing_p)
42756 return true;
42758 if (vmode == V8SImode)
42759 for (i = 0; i < 8; ++i)
42760 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42761 else
42763 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42764 if (!d->one_operand_p)
42765 mask = 2 * nelt - 1;
42766 else if (vmode == V16QImode)
42767 mask = nelt - 1;
42768 else
42769 mask = nelt / 2 - 1;
42771 for (i = 0; i < nelt; ++i)
42773 unsigned j, e = d->perm[i] & mask;
42774 for (j = 0; j < eltsz; ++j)
42775 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42779 vperm = gen_rtx_CONST_VECTOR (vmode,
42780 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42781 vperm = force_reg (vmode, vperm);
42783 target = d->target;
42784 if (d->vmode != vmode)
42785 target = gen_reg_rtx (vmode);
42786 op0 = gen_lowpart (vmode, d->op0);
42787 if (d->one_operand_p)
42789 if (vmode == V16QImode)
42790 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42791 else if (vmode == V32QImode)
42792 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42793 else if (vmode == V8SFmode)
42794 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42795 else
42796 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42798 else
42800 op1 = gen_lowpart (vmode, d->op1);
42801 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42803 if (target != d->target)
42804 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42806 return true;
42809 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42810 in a single instruction. */
42812 static bool
42813 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42815 unsigned i, nelt = d->nelt;
42816 unsigned char perm2[MAX_VECT_LEN];
42818 /* Check plain VEC_SELECT first, because AVX has instructions that could
42819 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42820 input where SEL+CONCAT may not. */
42821 if (d->one_operand_p)
42823 int mask = nelt - 1;
42824 bool identity_perm = true;
42825 bool broadcast_perm = true;
42827 for (i = 0; i < nelt; i++)
42829 perm2[i] = d->perm[i] & mask;
42830 if (perm2[i] != i)
42831 identity_perm = false;
42832 if (perm2[i])
42833 broadcast_perm = false;
42836 if (identity_perm)
42838 if (!d->testing_p)
42839 emit_move_insn (d->target, d->op0);
42840 return true;
42842 else if (broadcast_perm && TARGET_AVX2)
42844 /* Use vpbroadcast{b,w,d}. */
42845 rtx (*gen) (rtx, rtx) = NULL;
42846 switch (d->vmode)
42848 case V32QImode:
42849 gen = gen_avx2_pbroadcastv32qi_1;
42850 break;
42851 case V16HImode:
42852 gen = gen_avx2_pbroadcastv16hi_1;
42853 break;
42854 case V8SImode:
42855 gen = gen_avx2_pbroadcastv8si_1;
42856 break;
42857 case V16QImode:
42858 gen = gen_avx2_pbroadcastv16qi;
42859 break;
42860 case V8HImode:
42861 gen = gen_avx2_pbroadcastv8hi;
42862 break;
42863 case V8SFmode:
42864 gen = gen_avx2_vec_dupv8sf_1;
42865 break;
42866 /* For other modes prefer other shuffles this function creates. */
42867 default: break;
42869 if (gen != NULL)
42871 if (!d->testing_p)
42872 emit_insn (gen (d->target, d->op0));
42873 return true;
42877 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42878 return true;
42880 /* There are plenty of patterns in sse.md that are written for
42881 SEL+CONCAT and are not replicated for a single op. Perhaps
42882 that should be changed, to avoid the nastiness here. */
42884 /* Recognize interleave style patterns, which means incrementing
42885 every other permutation operand. */
42886 for (i = 0; i < nelt; i += 2)
42888 perm2[i] = d->perm[i] & mask;
42889 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
42891 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42892 d->testing_p))
42893 return true;
42895 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
42896 if (nelt >= 4)
42898 for (i = 0; i < nelt; i += 4)
42900 perm2[i + 0] = d->perm[i + 0] & mask;
42901 perm2[i + 1] = d->perm[i + 1] & mask;
42902 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
42903 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
42906 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42907 d->testing_p))
42908 return true;
42912 /* Finally, try the fully general two operand permute. */
42913 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
42914 d->testing_p))
42915 return true;
42917 /* Recognize interleave style patterns with reversed operands. */
42918 if (!d->one_operand_p)
42920 for (i = 0; i < nelt; ++i)
42922 unsigned e = d->perm[i];
42923 if (e >= nelt)
42924 e -= nelt;
42925 else
42926 e += nelt;
42927 perm2[i] = e;
42930 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
42931 d->testing_p))
42932 return true;
42935 /* Try the SSE4.1 blend variable merge instructions. */
42936 if (expand_vec_perm_blend (d))
42937 return true;
42939 /* Try one of the AVX vpermil variable permutations. */
42940 if (expand_vec_perm_vpermil (d))
42941 return true;
42943 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
42944 vpshufb, vpermd, vpermps or vpermq variable permutation. */
42945 if (expand_vec_perm_pshufb (d))
42946 return true;
42948 /* Try the AVX512F vpermi2 instructions. */
42949 rtx vec[64];
42950 enum machine_mode mode = d->vmode;
42951 if (mode == V8DFmode)
42952 mode = V8DImode;
42953 else if (mode == V16SFmode)
42954 mode = V16SImode;
42955 for (i = 0; i < nelt; ++i)
42956 vec[i] = GEN_INT (d->perm[i]);
42957 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
42958 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
42959 return true;
42961 return false;
42964 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42965 in terms of a pair of pshuflw + pshufhw instructions. */
42967 static bool
42968 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
42970 unsigned char perm2[MAX_VECT_LEN];
42971 unsigned i;
42972 bool ok;
42974 if (d->vmode != V8HImode || !d->one_operand_p)
42975 return false;
42977 /* The two permutations only operate in 64-bit lanes. */
42978 for (i = 0; i < 4; ++i)
42979 if (d->perm[i] >= 4)
42980 return false;
42981 for (i = 4; i < 8; ++i)
42982 if (d->perm[i] < 4)
42983 return false;
42985 if (d->testing_p)
42986 return true;
42988 /* Emit the pshuflw. */
42989 memcpy (perm2, d->perm, 4);
42990 for (i = 4; i < 8; ++i)
42991 perm2[i] = i;
42992 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
42993 gcc_assert (ok);
42995 /* Emit the pshufhw. */
42996 memcpy (perm2 + 4, d->perm + 4, 4);
42997 for (i = 0; i < 4; ++i)
42998 perm2[i] = i;
42999 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43000 gcc_assert (ok);
43002 return true;
43005 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43006 the permutation using the SSSE3 palignr instruction. This succeeds
43007 when all of the elements in PERM fit within one vector and we merely
43008 need to shift them down so that a single vector permutation has a
43009 chance to succeed. */
43011 static bool
43012 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43014 unsigned i, nelt = d->nelt;
43015 unsigned min, max;
43016 bool in_order, ok;
43017 rtx shift, target;
43018 struct expand_vec_perm_d dcopy;
43020 /* Even with AVX, palignr only operates on 128-bit vectors. */
43021 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43022 return false;
43024 min = nelt, max = 0;
43025 for (i = 0; i < nelt; ++i)
43027 unsigned e = d->perm[i];
43028 if (e < min)
43029 min = e;
43030 if (e > max)
43031 max = e;
43033 if (min == 0 || max - min >= nelt)
43034 return false;
43036 /* Given that we have SSSE3, we know we'll be able to implement the
43037 single operand permutation after the palignr with pshufb. */
43038 if (d->testing_p)
43039 return true;
43041 dcopy = *d;
43042 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43043 target = gen_reg_rtx (TImode);
43044 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43045 gen_lowpart (TImode, d->op0), shift));
43047 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43048 dcopy.one_operand_p = true;
43050 in_order = true;
43051 for (i = 0; i < nelt; ++i)
43053 unsigned e = dcopy.perm[i] - min;
43054 if (e != i)
43055 in_order = false;
43056 dcopy.perm[i] = e;
43059 /* Test for the degenerate case where the alignment by itself
43060 produces the desired permutation. */
43061 if (in_order)
43063 emit_move_insn (d->target, dcopy.op0);
43064 return true;
43067 ok = expand_vec_perm_1 (&dcopy);
43068 gcc_assert (ok);
43070 return ok;
43073 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43075 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43076 a two vector permutation into a single vector permutation by using
43077 an interleave operation to merge the vectors. */
43079 static bool
43080 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43082 struct expand_vec_perm_d dremap, dfinal;
43083 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43084 unsigned HOST_WIDE_INT contents;
43085 unsigned char remap[2 * MAX_VECT_LEN];
43086 rtx seq;
43087 bool ok, same_halves = false;
43089 if (GET_MODE_SIZE (d->vmode) == 16)
43091 if (d->one_operand_p)
43092 return false;
43094 else if (GET_MODE_SIZE (d->vmode) == 32)
43096 if (!TARGET_AVX)
43097 return false;
43098 /* For 32-byte modes allow even d->one_operand_p.
43099 The lack of cross-lane shuffling in some instructions
43100 might prevent a single insn shuffle. */
43101 dfinal = *d;
43102 dfinal.testing_p = true;
43103 /* If expand_vec_perm_interleave3 can expand this into
43104 a 3 insn sequence, give up and let it be expanded as
43105 3 insn sequence. While that is one insn longer,
43106 it doesn't need a memory operand and in the common
43107 case that both interleave low and high permutations
43108 with the same operands are adjacent needs 4 insns
43109 for both after CSE. */
43110 if (expand_vec_perm_interleave3 (&dfinal))
43111 return false;
43113 else
43114 return false;
43116 /* Examine from whence the elements come. */
43117 contents = 0;
43118 for (i = 0; i < nelt; ++i)
43119 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43121 memset (remap, 0xff, sizeof (remap));
43122 dremap = *d;
43124 if (GET_MODE_SIZE (d->vmode) == 16)
43126 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43128 /* Split the two input vectors into 4 halves. */
43129 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43130 h2 = h1 << nelt2;
43131 h3 = h2 << nelt2;
43132 h4 = h3 << nelt2;
43134 /* If the elements from the low halves use interleave low, and similarly
43135 for interleave high. If the elements are from mis-matched halves, we
43136 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43137 if ((contents & (h1 | h3)) == contents)
43139 /* punpckl* */
43140 for (i = 0; i < nelt2; ++i)
43142 remap[i] = i * 2;
43143 remap[i + nelt] = i * 2 + 1;
43144 dremap.perm[i * 2] = i;
43145 dremap.perm[i * 2 + 1] = i + nelt;
43147 if (!TARGET_SSE2 && d->vmode == V4SImode)
43148 dremap.vmode = V4SFmode;
43150 else if ((contents & (h2 | h4)) == contents)
43152 /* punpckh* */
43153 for (i = 0; i < nelt2; ++i)
43155 remap[i + nelt2] = i * 2;
43156 remap[i + nelt + nelt2] = i * 2 + 1;
43157 dremap.perm[i * 2] = i + nelt2;
43158 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43160 if (!TARGET_SSE2 && d->vmode == V4SImode)
43161 dremap.vmode = V4SFmode;
43163 else if ((contents & (h1 | h4)) == contents)
43165 /* shufps */
43166 for (i = 0; i < nelt2; ++i)
43168 remap[i] = i;
43169 remap[i + nelt + nelt2] = i + nelt2;
43170 dremap.perm[i] = i;
43171 dremap.perm[i + nelt2] = i + nelt + nelt2;
43173 if (nelt != 4)
43175 /* shufpd */
43176 dremap.vmode = V2DImode;
43177 dremap.nelt = 2;
43178 dremap.perm[0] = 0;
43179 dremap.perm[1] = 3;
43182 else if ((contents & (h2 | h3)) == contents)
43184 /* shufps */
43185 for (i = 0; i < nelt2; ++i)
43187 remap[i + nelt2] = i;
43188 remap[i + nelt] = i + nelt2;
43189 dremap.perm[i] = i + nelt2;
43190 dremap.perm[i + nelt2] = i + nelt;
43192 if (nelt != 4)
43194 /* shufpd */
43195 dremap.vmode = V2DImode;
43196 dremap.nelt = 2;
43197 dremap.perm[0] = 1;
43198 dremap.perm[1] = 2;
43201 else
43202 return false;
43204 else
43206 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43207 unsigned HOST_WIDE_INT q[8];
43208 unsigned int nonzero_halves[4];
43210 /* Split the two input vectors into 8 quarters. */
43211 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43212 for (i = 1; i < 8; ++i)
43213 q[i] = q[0] << (nelt4 * i);
43214 for (i = 0; i < 4; ++i)
43215 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43217 nonzero_halves[nzcnt] = i;
43218 ++nzcnt;
43221 if (nzcnt == 1)
43223 gcc_assert (d->one_operand_p);
43224 nonzero_halves[1] = nonzero_halves[0];
43225 same_halves = true;
43227 else if (d->one_operand_p)
43229 gcc_assert (nonzero_halves[0] == 0);
43230 gcc_assert (nonzero_halves[1] == 1);
43233 if (nzcnt <= 2)
43235 if (d->perm[0] / nelt2 == nonzero_halves[1])
43237 /* Attempt to increase the likelihood that dfinal
43238 shuffle will be intra-lane. */
43239 char tmph = nonzero_halves[0];
43240 nonzero_halves[0] = nonzero_halves[1];
43241 nonzero_halves[1] = tmph;
43244 /* vperm2f128 or vperm2i128. */
43245 for (i = 0; i < nelt2; ++i)
43247 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43248 remap[i + nonzero_halves[0] * nelt2] = i;
43249 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43250 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43253 if (d->vmode != V8SFmode
43254 && d->vmode != V4DFmode
43255 && d->vmode != V8SImode)
43257 dremap.vmode = V8SImode;
43258 dremap.nelt = 8;
43259 for (i = 0; i < 4; ++i)
43261 dremap.perm[i] = i + nonzero_halves[0] * 4;
43262 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43266 else if (d->one_operand_p)
43267 return false;
43268 else if (TARGET_AVX2
43269 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43271 /* vpunpckl* */
43272 for (i = 0; i < nelt4; ++i)
43274 remap[i] = i * 2;
43275 remap[i + nelt] = i * 2 + 1;
43276 remap[i + nelt2] = i * 2 + nelt2;
43277 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43278 dremap.perm[i * 2] = i;
43279 dremap.perm[i * 2 + 1] = i + nelt;
43280 dremap.perm[i * 2 + nelt2] = i + nelt2;
43281 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43284 else if (TARGET_AVX2
43285 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43287 /* vpunpckh* */
43288 for (i = 0; i < nelt4; ++i)
43290 remap[i + nelt4] = i * 2;
43291 remap[i + nelt + nelt4] = i * 2 + 1;
43292 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43293 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43294 dremap.perm[i * 2] = i + nelt4;
43295 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43296 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43297 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43300 else
43301 return false;
43304 /* Use the remapping array set up above to move the elements from their
43305 swizzled locations into their final destinations. */
43306 dfinal = *d;
43307 for (i = 0; i < nelt; ++i)
43309 unsigned e = remap[d->perm[i]];
43310 gcc_assert (e < nelt);
43311 /* If same_halves is true, both halves of the remapped vector are the
43312 same. Avoid cross-lane accesses if possible. */
43313 if (same_halves && i >= nelt2)
43315 gcc_assert (e < nelt2);
43316 dfinal.perm[i] = e + nelt2;
43318 else
43319 dfinal.perm[i] = e;
43321 if (!d->testing_p)
43323 dremap.target = gen_reg_rtx (dremap.vmode);
43324 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43326 dfinal.op1 = dfinal.op0;
43327 dfinal.one_operand_p = true;
43329 /* Test if the final remap can be done with a single insn. For V4SFmode or
43330 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43331 start_sequence ();
43332 ok = expand_vec_perm_1 (&dfinal);
43333 seq = get_insns ();
43334 end_sequence ();
43336 if (!ok)
43337 return false;
43339 if (d->testing_p)
43340 return true;
43342 if (dremap.vmode != dfinal.vmode)
43344 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43345 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43348 ok = expand_vec_perm_1 (&dremap);
43349 gcc_assert (ok);
43351 emit_insn (seq);
43352 return true;
43355 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43356 a single vector cross-lane permutation into vpermq followed
43357 by any of the single insn permutations. */
43359 static bool
43360 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43362 struct expand_vec_perm_d dremap, dfinal;
43363 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43364 unsigned contents[2];
43365 bool ok;
43367 if (!(TARGET_AVX2
43368 && (d->vmode == V32QImode || d->vmode == V16HImode)
43369 && d->one_operand_p))
43370 return false;
43372 contents[0] = 0;
43373 contents[1] = 0;
43374 for (i = 0; i < nelt2; ++i)
43376 contents[0] |= 1u << (d->perm[i] / nelt4);
43377 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43380 for (i = 0; i < 2; ++i)
43382 unsigned int cnt = 0;
43383 for (j = 0; j < 4; ++j)
43384 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43385 return false;
43388 if (d->testing_p)
43389 return true;
43391 dremap = *d;
43392 dremap.vmode = V4DImode;
43393 dremap.nelt = 4;
43394 dremap.target = gen_reg_rtx (V4DImode);
43395 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43396 dremap.op1 = dremap.op0;
43397 dremap.one_operand_p = true;
43398 for (i = 0; i < 2; ++i)
43400 unsigned int cnt = 0;
43401 for (j = 0; j < 4; ++j)
43402 if ((contents[i] & (1u << j)) != 0)
43403 dremap.perm[2 * i + cnt++] = j;
43404 for (; cnt < 2; ++cnt)
43405 dremap.perm[2 * i + cnt] = 0;
43408 dfinal = *d;
43409 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43410 dfinal.op1 = dfinal.op0;
43411 dfinal.one_operand_p = true;
43412 for (i = 0, j = 0; i < nelt; ++i)
43414 if (i == nelt2)
43415 j = 2;
43416 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43417 if ((d->perm[i] / nelt4) == dremap.perm[j])
43419 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43420 dfinal.perm[i] |= nelt4;
43421 else
43422 gcc_unreachable ();
43425 ok = expand_vec_perm_1 (&dremap);
43426 gcc_assert (ok);
43428 ok = expand_vec_perm_1 (&dfinal);
43429 gcc_assert (ok);
43431 return true;
43434 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43435 a vector permutation using two instructions, vperm2f128 resp.
43436 vperm2i128 followed by any single in-lane permutation. */
43438 static bool
43439 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43441 struct expand_vec_perm_d dfirst, dsecond;
43442 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43443 bool ok;
43445 if (!TARGET_AVX
43446 || GET_MODE_SIZE (d->vmode) != 32
43447 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43448 return false;
43450 dsecond = *d;
43451 dsecond.one_operand_p = false;
43452 dsecond.testing_p = true;
43454 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43455 immediate. For perm < 16 the second permutation uses
43456 d->op0 as first operand, for perm >= 16 it uses d->op1
43457 as first operand. The second operand is the result of
43458 vperm2[fi]128. */
43459 for (perm = 0; perm < 32; perm++)
43461 /* Ignore permutations which do not move anything cross-lane. */
43462 if (perm < 16)
43464 /* The second shuffle for e.g. V4DFmode has
43465 0123 and ABCD operands.
43466 Ignore AB23, as 23 is already in the second lane
43467 of the first operand. */
43468 if ((perm & 0xc) == (1 << 2)) continue;
43469 /* And 01CD, as 01 is in the first lane of the first
43470 operand. */
43471 if ((perm & 3) == 0) continue;
43472 /* And 4567, as then the vperm2[fi]128 doesn't change
43473 anything on the original 4567 second operand. */
43474 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43476 else
43478 /* The second shuffle for e.g. V4DFmode has
43479 4567 and ABCD operands.
43480 Ignore AB67, as 67 is already in the second lane
43481 of the first operand. */
43482 if ((perm & 0xc) == (3 << 2)) continue;
43483 /* And 45CD, as 45 is in the first lane of the first
43484 operand. */
43485 if ((perm & 3) == 2) continue;
43486 /* And 0123, as then the vperm2[fi]128 doesn't change
43487 anything on the original 0123 first operand. */
43488 if ((perm & 0xf) == (1 << 2)) continue;
43491 for (i = 0; i < nelt; i++)
43493 j = d->perm[i] / nelt2;
43494 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43495 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43496 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43497 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43498 else
43499 break;
43502 if (i == nelt)
43504 start_sequence ();
43505 ok = expand_vec_perm_1 (&dsecond);
43506 end_sequence ();
43508 else
43509 ok = false;
43511 if (ok)
43513 if (d->testing_p)
43514 return true;
43516 /* Found a usable second shuffle. dfirst will be
43517 vperm2f128 on d->op0 and d->op1. */
43518 dsecond.testing_p = false;
43519 dfirst = *d;
43520 dfirst.target = gen_reg_rtx (d->vmode);
43521 for (i = 0; i < nelt; i++)
43522 dfirst.perm[i] = (i & (nelt2 - 1))
43523 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43525 ok = expand_vec_perm_1 (&dfirst);
43526 gcc_assert (ok);
43528 /* And dsecond is some single insn shuffle, taking
43529 d->op0 and result of vperm2f128 (if perm < 16) or
43530 d->op1 and result of vperm2f128 (otherwise). */
43531 dsecond.op1 = dfirst.target;
43532 if (perm >= 16)
43533 dsecond.op0 = dfirst.op1;
43535 ok = expand_vec_perm_1 (&dsecond);
43536 gcc_assert (ok);
43538 return true;
43541 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43542 if (d->one_operand_p)
43543 return false;
43546 return false;
43549 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43550 a two vector permutation using 2 intra-lane interleave insns
43551 and cross-lane shuffle for 32-byte vectors. */
43553 static bool
43554 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43556 unsigned i, nelt;
43557 rtx (*gen) (rtx, rtx, rtx);
43559 if (d->one_operand_p)
43560 return false;
43561 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43563 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43565 else
43566 return false;
43568 nelt = d->nelt;
43569 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43570 return false;
43571 for (i = 0; i < nelt; i += 2)
43572 if (d->perm[i] != d->perm[0] + i / 2
43573 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43574 return false;
43576 if (d->testing_p)
43577 return true;
43579 switch (d->vmode)
43581 case V32QImode:
43582 if (d->perm[0])
43583 gen = gen_vec_interleave_highv32qi;
43584 else
43585 gen = gen_vec_interleave_lowv32qi;
43586 break;
43587 case V16HImode:
43588 if (d->perm[0])
43589 gen = gen_vec_interleave_highv16hi;
43590 else
43591 gen = gen_vec_interleave_lowv16hi;
43592 break;
43593 case V8SImode:
43594 if (d->perm[0])
43595 gen = gen_vec_interleave_highv8si;
43596 else
43597 gen = gen_vec_interleave_lowv8si;
43598 break;
43599 case V4DImode:
43600 if (d->perm[0])
43601 gen = gen_vec_interleave_highv4di;
43602 else
43603 gen = gen_vec_interleave_lowv4di;
43604 break;
43605 case V8SFmode:
43606 if (d->perm[0])
43607 gen = gen_vec_interleave_highv8sf;
43608 else
43609 gen = gen_vec_interleave_lowv8sf;
43610 break;
43611 case V4DFmode:
43612 if (d->perm[0])
43613 gen = gen_vec_interleave_highv4df;
43614 else
43615 gen = gen_vec_interleave_lowv4df;
43616 break;
43617 default:
43618 gcc_unreachable ();
43621 emit_insn (gen (d->target, d->op0, d->op1));
43622 return true;
43625 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43626 a single vector permutation using a single intra-lane vector
43627 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43628 the non-swapped and swapped vectors together. */
43630 static bool
43631 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43633 struct expand_vec_perm_d dfirst, dsecond;
43634 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43635 rtx seq;
43636 bool ok;
43637 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43639 if (!TARGET_AVX
43640 || TARGET_AVX2
43641 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43642 || !d->one_operand_p)
43643 return false;
43645 dfirst = *d;
43646 for (i = 0; i < nelt; i++)
43647 dfirst.perm[i] = 0xff;
43648 for (i = 0, msk = 0; i < nelt; i++)
43650 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43651 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43652 return false;
43653 dfirst.perm[j] = d->perm[i];
43654 if (j != i)
43655 msk |= (1 << i);
43657 for (i = 0; i < nelt; i++)
43658 if (dfirst.perm[i] == 0xff)
43659 dfirst.perm[i] = i;
43661 if (!d->testing_p)
43662 dfirst.target = gen_reg_rtx (dfirst.vmode);
43664 start_sequence ();
43665 ok = expand_vec_perm_1 (&dfirst);
43666 seq = get_insns ();
43667 end_sequence ();
43669 if (!ok)
43670 return false;
43672 if (d->testing_p)
43673 return true;
43675 emit_insn (seq);
43677 dsecond = *d;
43678 dsecond.op0 = dfirst.target;
43679 dsecond.op1 = dfirst.target;
43680 dsecond.one_operand_p = true;
43681 dsecond.target = gen_reg_rtx (dsecond.vmode);
43682 for (i = 0; i < nelt; i++)
43683 dsecond.perm[i] = i ^ nelt2;
43685 ok = expand_vec_perm_1 (&dsecond);
43686 gcc_assert (ok);
43688 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43689 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43690 return true;
43693 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43694 permutation using two vperm2f128, followed by a vshufpd insn blending
43695 the two vectors together. */
43697 static bool
43698 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43700 struct expand_vec_perm_d dfirst, dsecond, dthird;
43701 bool ok;
43703 if (!TARGET_AVX || (d->vmode != V4DFmode))
43704 return false;
43706 if (d->testing_p)
43707 return true;
43709 dfirst = *d;
43710 dsecond = *d;
43711 dthird = *d;
43713 dfirst.perm[0] = (d->perm[0] & ~1);
43714 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43715 dfirst.perm[2] = (d->perm[2] & ~1);
43716 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43717 dsecond.perm[0] = (d->perm[1] & ~1);
43718 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43719 dsecond.perm[2] = (d->perm[3] & ~1);
43720 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43721 dthird.perm[0] = (d->perm[0] % 2);
43722 dthird.perm[1] = (d->perm[1] % 2) + 4;
43723 dthird.perm[2] = (d->perm[2] % 2) + 2;
43724 dthird.perm[3] = (d->perm[3] % 2) + 6;
43726 dfirst.target = gen_reg_rtx (dfirst.vmode);
43727 dsecond.target = gen_reg_rtx (dsecond.vmode);
43728 dthird.op0 = dfirst.target;
43729 dthird.op1 = dsecond.target;
43730 dthird.one_operand_p = false;
43732 canonicalize_perm (&dfirst);
43733 canonicalize_perm (&dsecond);
43735 ok = expand_vec_perm_1 (&dfirst)
43736 && expand_vec_perm_1 (&dsecond)
43737 && expand_vec_perm_1 (&dthird);
43739 gcc_assert (ok);
43741 return true;
43744 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43745 permutation with two pshufb insns and an ior. We should have already
43746 failed all two instruction sequences. */
43748 static bool
43749 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43751 rtx rperm[2][16], vperm, l, h, op, m128;
43752 unsigned int i, nelt, eltsz;
43754 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43755 return false;
43756 gcc_assert (!d->one_operand_p);
43758 if (d->testing_p)
43759 return true;
43761 nelt = d->nelt;
43762 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43764 /* Generate two permutation masks. If the required element is within
43765 the given vector it is shuffled into the proper lane. If the required
43766 element is in the other vector, force a zero into the lane by setting
43767 bit 7 in the permutation mask. */
43768 m128 = GEN_INT (-128);
43769 for (i = 0; i < nelt; ++i)
43771 unsigned j, e = d->perm[i];
43772 unsigned which = (e >= nelt);
43773 if (e >= nelt)
43774 e -= nelt;
43776 for (j = 0; j < eltsz; ++j)
43778 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43779 rperm[1-which][i*eltsz + j] = m128;
43783 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43784 vperm = force_reg (V16QImode, vperm);
43786 l = gen_reg_rtx (V16QImode);
43787 op = gen_lowpart (V16QImode, d->op0);
43788 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43790 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43791 vperm = force_reg (V16QImode, vperm);
43793 h = gen_reg_rtx (V16QImode);
43794 op = gen_lowpart (V16QImode, d->op1);
43795 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43797 op = d->target;
43798 if (d->vmode != V16QImode)
43799 op = gen_reg_rtx (V16QImode);
43800 emit_insn (gen_iorv16qi3 (op, l, h));
43801 if (op != d->target)
43802 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43804 return true;
43807 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43808 with two vpshufb insns, vpermq and vpor. We should have already failed
43809 all two or three instruction sequences. */
43811 static bool
43812 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43814 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43815 unsigned int i, nelt, eltsz;
43817 if (!TARGET_AVX2
43818 || !d->one_operand_p
43819 || (d->vmode != V32QImode && d->vmode != V16HImode))
43820 return false;
43822 if (d->testing_p)
43823 return true;
43825 nelt = d->nelt;
43826 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43828 /* Generate two permutation masks. If the required element is within
43829 the same lane, it is shuffled in. If the required element from the
43830 other lane, force a zero by setting bit 7 in the permutation mask.
43831 In the other mask the mask has non-negative elements if element
43832 is requested from the other lane, but also moved to the other lane,
43833 so that the result of vpshufb can have the two V2TImode halves
43834 swapped. */
43835 m128 = GEN_INT (-128);
43836 for (i = 0; i < nelt; ++i)
43838 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43839 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43841 for (j = 0; j < eltsz; ++j)
43843 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43844 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43848 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43849 vperm = force_reg (V32QImode, vperm);
43851 h = gen_reg_rtx (V32QImode);
43852 op = gen_lowpart (V32QImode, d->op0);
43853 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43855 /* Swap the 128-byte lanes of h into hp. */
43856 hp = gen_reg_rtx (V4DImode);
43857 op = gen_lowpart (V4DImode, h);
43858 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43859 const1_rtx));
43861 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43862 vperm = force_reg (V32QImode, vperm);
43864 l = gen_reg_rtx (V32QImode);
43865 op = gen_lowpart (V32QImode, d->op0);
43866 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43868 op = d->target;
43869 if (d->vmode != V32QImode)
43870 op = gen_reg_rtx (V32QImode);
43871 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43872 if (op != d->target)
43873 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43875 return true;
43878 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43879 and extract-odd permutations of two V32QImode and V16QImode operand
43880 with two vpshufb insns, vpor and vpermq. We should have already
43881 failed all two or three instruction sequences. */
43883 static bool
43884 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
43886 rtx rperm[2][32], vperm, l, h, ior, op, m128;
43887 unsigned int i, nelt, eltsz;
43889 if (!TARGET_AVX2
43890 || d->one_operand_p
43891 || (d->vmode != V32QImode && d->vmode != V16HImode))
43892 return false;
43894 for (i = 0; i < d->nelt; ++i)
43895 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
43896 return false;
43898 if (d->testing_p)
43899 return true;
43901 nelt = d->nelt;
43902 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43904 /* Generate two permutation masks. In the first permutation mask
43905 the first quarter will contain indexes for the first half
43906 of the op0, the second quarter will contain bit 7 set, third quarter
43907 will contain indexes for the second half of the op0 and the
43908 last quarter bit 7 set. In the second permutation mask
43909 the first quarter will contain bit 7 set, the second quarter
43910 indexes for the first half of the op1, the third quarter bit 7 set
43911 and last quarter indexes for the second half of the op1.
43912 I.e. the first mask e.g. for V32QImode extract even will be:
43913 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
43914 (all values masked with 0xf except for -128) and second mask
43915 for extract even will be
43916 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
43917 m128 = GEN_INT (-128);
43918 for (i = 0; i < nelt; ++i)
43920 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43921 unsigned which = d->perm[i] >= nelt;
43922 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
43924 for (j = 0; j < eltsz; ++j)
43926 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
43927 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
43931 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43932 vperm = force_reg (V32QImode, vperm);
43934 l = gen_reg_rtx (V32QImode);
43935 op = gen_lowpart (V32QImode, d->op0);
43936 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43938 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43939 vperm = force_reg (V32QImode, vperm);
43941 h = gen_reg_rtx (V32QImode);
43942 op = gen_lowpart (V32QImode, d->op1);
43943 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43945 ior = gen_reg_rtx (V32QImode);
43946 emit_insn (gen_iorv32qi3 (ior, l, h));
43948 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
43949 op = gen_reg_rtx (V4DImode);
43950 ior = gen_lowpart (V4DImode, ior);
43951 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
43952 const1_rtx, GEN_INT (3)));
43953 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43955 return true;
43958 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
43959 and extract-odd permutations. */
43961 static bool
43962 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
43964 rtx t1, t2, t3, t4, t5;
43966 switch (d->vmode)
43968 case V4DFmode:
43969 if (d->testing_p)
43970 break;
43971 t1 = gen_reg_rtx (V4DFmode);
43972 t2 = gen_reg_rtx (V4DFmode);
43974 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
43975 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
43976 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
43978 /* Now an unpck[lh]pd will produce the result required. */
43979 if (odd)
43980 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
43981 else
43982 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
43983 emit_insn (t3);
43984 break;
43986 case V8SFmode:
43988 int mask = odd ? 0xdd : 0x88;
43990 if (d->testing_p)
43991 break;
43992 t1 = gen_reg_rtx (V8SFmode);
43993 t2 = gen_reg_rtx (V8SFmode);
43994 t3 = gen_reg_rtx (V8SFmode);
43996 /* Shuffle within the 128-bit lanes to produce:
43997 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
43998 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
43999 GEN_INT (mask)));
44001 /* Shuffle the lanes around to produce:
44002 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44003 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44004 GEN_INT (0x3)));
44006 /* Shuffle within the 128-bit lanes to produce:
44007 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44008 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44010 /* Shuffle within the 128-bit lanes to produce:
44011 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44012 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44014 /* Shuffle the lanes around to produce:
44015 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44016 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44017 GEN_INT (0x20)));
44019 break;
44021 case V2DFmode:
44022 case V4SFmode:
44023 case V2DImode:
44024 case V4SImode:
44025 /* These are always directly implementable by expand_vec_perm_1. */
44026 gcc_unreachable ();
44028 case V8HImode:
44029 if (TARGET_SSSE3)
44030 return expand_vec_perm_pshufb2 (d);
44031 else
44033 if (d->testing_p)
44034 break;
44035 /* We need 2*log2(N)-1 operations to achieve odd/even
44036 with interleave. */
44037 t1 = gen_reg_rtx (V8HImode);
44038 t2 = gen_reg_rtx (V8HImode);
44039 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44040 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44041 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44042 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44043 if (odd)
44044 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44045 else
44046 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44047 emit_insn (t3);
44049 break;
44051 case V16QImode:
44052 if (TARGET_SSSE3)
44053 return expand_vec_perm_pshufb2 (d);
44054 else
44056 if (d->testing_p)
44057 break;
44058 t1 = gen_reg_rtx (V16QImode);
44059 t2 = gen_reg_rtx (V16QImode);
44060 t3 = gen_reg_rtx (V16QImode);
44061 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44062 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44063 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44064 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44065 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44066 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44067 if (odd)
44068 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44069 else
44070 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44071 emit_insn (t3);
44073 break;
44075 case V16HImode:
44076 case V32QImode:
44077 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44079 case V4DImode:
44080 if (!TARGET_AVX2)
44082 struct expand_vec_perm_d d_copy = *d;
44083 d_copy.vmode = V4DFmode;
44084 if (d->testing_p)
44085 d_copy.target = gen_lowpart (V4DFmode, d->target);
44086 else
44087 d_copy.target = gen_reg_rtx (V4DFmode);
44088 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44089 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44090 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44092 if (!d->testing_p)
44093 emit_move_insn (d->target,
44094 gen_lowpart (V4DImode, d_copy.target));
44095 return true;
44097 return false;
44100 if (d->testing_p)
44101 break;
44103 t1 = gen_reg_rtx (V4DImode);
44104 t2 = gen_reg_rtx (V4DImode);
44106 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44107 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44108 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44110 /* Now an vpunpck[lh]qdq will produce the result required. */
44111 if (odd)
44112 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44113 else
44114 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44115 emit_insn (t3);
44116 break;
44118 case V8SImode:
44119 if (!TARGET_AVX2)
44121 struct expand_vec_perm_d d_copy = *d;
44122 d_copy.vmode = V8SFmode;
44123 if (d->testing_p)
44124 d_copy.target = gen_lowpart (V8SFmode, d->target);
44125 else
44126 d_copy.target = gen_reg_rtx (V8SFmode);
44127 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44128 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44129 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44131 if (!d->testing_p)
44132 emit_move_insn (d->target,
44133 gen_lowpart (V8SImode, d_copy.target));
44134 return true;
44136 return false;
44139 if (d->testing_p)
44140 break;
44142 t1 = gen_reg_rtx (V8SImode);
44143 t2 = gen_reg_rtx (V8SImode);
44144 t3 = gen_reg_rtx (V4DImode);
44145 t4 = gen_reg_rtx (V4DImode);
44146 t5 = gen_reg_rtx (V4DImode);
44148 /* Shuffle the lanes around into
44149 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44150 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44151 gen_lowpart (V4DImode, d->op1),
44152 GEN_INT (0x20)));
44153 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44154 gen_lowpart (V4DImode, d->op1),
44155 GEN_INT (0x31)));
44157 /* Swap the 2nd and 3rd position in each lane into
44158 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44159 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44160 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44161 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44162 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44164 /* Now an vpunpck[lh]qdq will produce
44165 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44166 if (odd)
44167 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44168 gen_lowpart (V4DImode, t2));
44169 else
44170 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44171 gen_lowpart (V4DImode, t2));
44172 emit_insn (t3);
44173 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44174 break;
44176 default:
44177 gcc_unreachable ();
44180 return true;
44183 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44184 extract-even and extract-odd permutations. */
44186 static bool
44187 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44189 unsigned i, odd, nelt = d->nelt;
44191 odd = d->perm[0];
44192 if (odd != 0 && odd != 1)
44193 return false;
44195 for (i = 1; i < nelt; ++i)
44196 if (d->perm[i] != 2 * i + odd)
44197 return false;
44199 return expand_vec_perm_even_odd_1 (d, odd);
44202 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44203 permutations. We assume that expand_vec_perm_1 has already failed. */
44205 static bool
44206 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44208 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44209 enum machine_mode vmode = d->vmode;
44210 unsigned char perm2[4];
44211 rtx op0 = d->op0, dest;
44212 bool ok;
44214 switch (vmode)
44216 case V4DFmode:
44217 case V8SFmode:
44218 /* These are special-cased in sse.md so that we can optionally
44219 use the vbroadcast instruction. They expand to two insns
44220 if the input happens to be in a register. */
44221 gcc_unreachable ();
44223 case V2DFmode:
44224 case V2DImode:
44225 case V4SFmode:
44226 case V4SImode:
44227 /* These are always implementable using standard shuffle patterns. */
44228 gcc_unreachable ();
44230 case V8HImode:
44231 case V16QImode:
44232 /* These can be implemented via interleave. We save one insn by
44233 stopping once we have promoted to V4SImode and then use pshufd. */
44234 if (d->testing_p)
44235 return true;
44238 rtx dest;
44239 rtx (*gen) (rtx, rtx, rtx)
44240 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44241 : gen_vec_interleave_lowv8hi;
44243 if (elt >= nelt2)
44245 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44246 : gen_vec_interleave_highv8hi;
44247 elt -= nelt2;
44249 nelt2 /= 2;
44251 dest = gen_reg_rtx (vmode);
44252 emit_insn (gen (dest, op0, op0));
44253 vmode = get_mode_wider_vector (vmode);
44254 op0 = gen_lowpart (vmode, dest);
44256 while (vmode != V4SImode);
44258 memset (perm2, elt, 4);
44259 dest = gen_reg_rtx (V4SImode);
44260 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44261 gcc_assert (ok);
44262 if (!d->testing_p)
44263 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44264 return true;
44266 case V32QImode:
44267 case V16HImode:
44268 case V8SImode:
44269 case V4DImode:
44270 /* For AVX2 broadcasts of the first element vpbroadcast* or
44271 vpermq should be used by expand_vec_perm_1. */
44272 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44273 return false;
44275 default:
44276 gcc_unreachable ();
44280 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44281 broadcast permutations. */
44283 static bool
44284 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44286 unsigned i, elt, nelt = d->nelt;
44288 if (!d->one_operand_p)
44289 return false;
44291 elt = d->perm[0];
44292 for (i = 1; i < nelt; ++i)
44293 if (d->perm[i] != elt)
44294 return false;
44296 return expand_vec_perm_broadcast_1 (d);
44299 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44300 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44301 all the shorter instruction sequences. */
44303 static bool
44304 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44306 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44307 unsigned int i, nelt, eltsz;
44308 bool used[4];
44310 if (!TARGET_AVX2
44311 || d->one_operand_p
44312 || (d->vmode != V32QImode && d->vmode != V16HImode))
44313 return false;
44315 if (d->testing_p)
44316 return true;
44318 nelt = d->nelt;
44319 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44321 /* Generate 4 permutation masks. If the required element is within
44322 the same lane, it is shuffled in. If the required element from the
44323 other lane, force a zero by setting bit 7 in the permutation mask.
44324 In the other mask the mask has non-negative elements if element
44325 is requested from the other lane, but also moved to the other lane,
44326 so that the result of vpshufb can have the two V2TImode halves
44327 swapped. */
44328 m128 = GEN_INT (-128);
44329 for (i = 0; i < 32; ++i)
44331 rperm[0][i] = m128;
44332 rperm[1][i] = m128;
44333 rperm[2][i] = m128;
44334 rperm[3][i] = m128;
44336 used[0] = false;
44337 used[1] = false;
44338 used[2] = false;
44339 used[3] = false;
44340 for (i = 0; i < nelt; ++i)
44342 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44343 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44344 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44346 for (j = 0; j < eltsz; ++j)
44347 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44348 used[which] = true;
44351 for (i = 0; i < 2; ++i)
44353 if (!used[2 * i + 1])
44355 h[i] = NULL_RTX;
44356 continue;
44358 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44359 gen_rtvec_v (32, rperm[2 * i + 1]));
44360 vperm = force_reg (V32QImode, vperm);
44361 h[i] = gen_reg_rtx (V32QImode);
44362 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44363 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44366 /* Swap the 128-byte lanes of h[X]. */
44367 for (i = 0; i < 2; ++i)
44369 if (h[i] == NULL_RTX)
44370 continue;
44371 op = gen_reg_rtx (V4DImode);
44372 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44373 const2_rtx, GEN_INT (3), const0_rtx,
44374 const1_rtx));
44375 h[i] = gen_lowpart (V32QImode, op);
44378 for (i = 0; i < 2; ++i)
44380 if (!used[2 * i])
44382 l[i] = NULL_RTX;
44383 continue;
44385 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44386 vperm = force_reg (V32QImode, vperm);
44387 l[i] = gen_reg_rtx (V32QImode);
44388 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44389 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44392 for (i = 0; i < 2; ++i)
44394 if (h[i] && l[i])
44396 op = gen_reg_rtx (V32QImode);
44397 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44398 l[i] = op;
44400 else if (h[i])
44401 l[i] = h[i];
44404 gcc_assert (l[0] && l[1]);
44405 op = d->target;
44406 if (d->vmode != V32QImode)
44407 op = gen_reg_rtx (V32QImode);
44408 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44409 if (op != d->target)
44410 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44411 return true;
44414 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44415 With all of the interface bits taken care of, perform the expansion
44416 in D and return true on success. */
44418 static bool
44419 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44421 /* Try a single instruction expansion. */
44422 if (expand_vec_perm_1 (d))
44423 return true;
44425 /* Try sequences of two instructions. */
44427 if (expand_vec_perm_pshuflw_pshufhw (d))
44428 return true;
44430 if (expand_vec_perm_palignr (d))
44431 return true;
44433 if (expand_vec_perm_interleave2 (d))
44434 return true;
44436 if (expand_vec_perm_broadcast (d))
44437 return true;
44439 if (expand_vec_perm_vpermq_perm_1 (d))
44440 return true;
44442 if (expand_vec_perm_vperm2f128 (d))
44443 return true;
44445 /* Try sequences of three instructions. */
44447 if (expand_vec_perm_2vperm2f128_vshuf (d))
44448 return true;
44450 if (expand_vec_perm_pshufb2 (d))
44451 return true;
44453 if (expand_vec_perm_interleave3 (d))
44454 return true;
44456 if (expand_vec_perm_vperm2f128_vblend (d))
44457 return true;
44459 /* Try sequences of four instructions. */
44461 if (expand_vec_perm_vpshufb2_vpermq (d))
44462 return true;
44464 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44465 return true;
44467 /* ??? Look for narrow permutations whose element orderings would
44468 allow the promotion to a wider mode. */
44470 /* ??? Look for sequences of interleave or a wider permute that place
44471 the data into the correct lanes for a half-vector shuffle like
44472 pshuf[lh]w or vpermilps. */
44474 /* ??? Look for sequences of interleave that produce the desired results.
44475 The combinatorics of punpck[lh] get pretty ugly... */
44477 if (expand_vec_perm_even_odd (d))
44478 return true;
44480 /* Even longer sequences. */
44481 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44482 return true;
44484 return false;
44487 /* If a permutation only uses one operand, make it clear. Returns true
44488 if the permutation references both operands. */
44490 static bool
44491 canonicalize_perm (struct expand_vec_perm_d *d)
44493 int i, which, nelt = d->nelt;
44495 for (i = which = 0; i < nelt; ++i)
44496 which |= (d->perm[i] < nelt ? 1 : 2);
44498 d->one_operand_p = true;
44499 switch (which)
44501 default:
44502 gcc_unreachable();
44504 case 3:
44505 if (!rtx_equal_p (d->op0, d->op1))
44507 d->one_operand_p = false;
44508 break;
44510 /* The elements of PERM do not suggest that only the first operand
44511 is used, but both operands are identical. Allow easier matching
44512 of the permutation by folding the permutation into the single
44513 input vector. */
44514 /* FALLTHRU */
44516 case 2:
44517 for (i = 0; i < nelt; ++i)
44518 d->perm[i] &= nelt - 1;
44519 d->op0 = d->op1;
44520 break;
44522 case 1:
44523 d->op1 = d->op0;
44524 break;
44527 return (which == 3);
44530 bool
44531 ix86_expand_vec_perm_const (rtx operands[4])
44533 struct expand_vec_perm_d d;
44534 unsigned char perm[MAX_VECT_LEN];
44535 int i, nelt;
44536 bool two_args;
44537 rtx sel;
44539 d.target = operands[0];
44540 d.op0 = operands[1];
44541 d.op1 = operands[2];
44542 sel = operands[3];
44544 d.vmode = GET_MODE (d.target);
44545 gcc_assert (VECTOR_MODE_P (d.vmode));
44546 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44547 d.testing_p = false;
44549 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44550 gcc_assert (XVECLEN (sel, 0) == nelt);
44551 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44553 for (i = 0; i < nelt; ++i)
44555 rtx e = XVECEXP (sel, 0, i);
44556 int ei = INTVAL (e) & (2 * nelt - 1);
44557 d.perm[i] = ei;
44558 perm[i] = ei;
44561 two_args = canonicalize_perm (&d);
44563 if (ix86_expand_vec_perm_const_1 (&d))
44564 return true;
44566 /* If the selector says both arguments are needed, but the operands are the
44567 same, the above tried to expand with one_operand_p and flattened selector.
44568 If that didn't work, retry without one_operand_p; we succeeded with that
44569 during testing. */
44570 if (two_args && d.one_operand_p)
44572 d.one_operand_p = false;
44573 memcpy (d.perm, perm, sizeof (perm));
44574 return ix86_expand_vec_perm_const_1 (&d);
44577 return false;
44580 /* Implement targetm.vectorize.vec_perm_const_ok. */
44582 static bool
44583 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44584 const unsigned char *sel)
44586 struct expand_vec_perm_d d;
44587 unsigned int i, nelt, which;
44588 bool ret;
44590 d.vmode = vmode;
44591 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44592 d.testing_p = true;
44594 /* Given sufficient ISA support we can just return true here
44595 for selected vector modes. */
44596 if (d.vmode == V16SImode || d.vmode == V16SFmode
44597 || d.vmode == V8DFmode || d.vmode == V8DImode)
44598 /* All implementable with a single vpermi2 insn. */
44599 return true;
44600 if (GET_MODE_SIZE (d.vmode) == 16)
44602 /* All implementable with a single vpperm insn. */
44603 if (TARGET_XOP)
44604 return true;
44605 /* All implementable with 2 pshufb + 1 ior. */
44606 if (TARGET_SSSE3)
44607 return true;
44608 /* All implementable with shufpd or unpck[lh]pd. */
44609 if (d.nelt == 2)
44610 return true;
44613 /* Extract the values from the vector CST into the permutation
44614 array in D. */
44615 memcpy (d.perm, sel, nelt);
44616 for (i = which = 0; i < nelt; ++i)
44618 unsigned char e = d.perm[i];
44619 gcc_assert (e < 2 * nelt);
44620 which |= (e < nelt ? 1 : 2);
44623 /* For all elements from second vector, fold the elements to first. */
44624 if (which == 2)
44625 for (i = 0; i < nelt; ++i)
44626 d.perm[i] -= nelt;
44628 /* Check whether the mask can be applied to the vector type. */
44629 d.one_operand_p = (which != 3);
44631 /* Implementable with shufps or pshufd. */
44632 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44633 return true;
44635 /* Otherwise we have to go through the motions and see if we can
44636 figure out how to generate the requested permutation. */
44637 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44638 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44639 if (!d.one_operand_p)
44640 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44642 start_sequence ();
44643 ret = ix86_expand_vec_perm_const_1 (&d);
44644 end_sequence ();
44646 return ret;
44649 void
44650 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44652 struct expand_vec_perm_d d;
44653 unsigned i, nelt;
44655 d.target = targ;
44656 d.op0 = op0;
44657 d.op1 = op1;
44658 d.vmode = GET_MODE (targ);
44659 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44660 d.one_operand_p = false;
44661 d.testing_p = false;
44663 for (i = 0; i < nelt; ++i)
44664 d.perm[i] = i * 2 + odd;
44666 /* We'll either be able to implement the permutation directly... */
44667 if (expand_vec_perm_1 (&d))
44668 return;
44670 /* ... or we use the special-case patterns. */
44671 expand_vec_perm_even_odd_1 (&d, odd);
44674 static void
44675 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44677 struct expand_vec_perm_d d;
44678 unsigned i, nelt, base;
44679 bool ok;
44681 d.target = targ;
44682 d.op0 = op0;
44683 d.op1 = op1;
44684 d.vmode = GET_MODE (targ);
44685 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44686 d.one_operand_p = false;
44687 d.testing_p = false;
44689 base = high_p ? nelt / 2 : 0;
44690 for (i = 0; i < nelt / 2; ++i)
44692 d.perm[i * 2] = i + base;
44693 d.perm[i * 2 + 1] = i + base + nelt;
44696 /* Note that for AVX this isn't one instruction. */
44697 ok = ix86_expand_vec_perm_const_1 (&d);
44698 gcc_assert (ok);
44702 /* Expand a vector operation CODE for a V*QImode in terms of the
44703 same operation on V*HImode. */
44705 void
44706 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44708 enum machine_mode qimode = GET_MODE (dest);
44709 enum machine_mode himode;
44710 rtx (*gen_il) (rtx, rtx, rtx);
44711 rtx (*gen_ih) (rtx, rtx, rtx);
44712 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44713 struct expand_vec_perm_d d;
44714 bool ok, full_interleave;
44715 bool uns_p = false;
44716 int i;
44718 switch (qimode)
44720 case V16QImode:
44721 himode = V8HImode;
44722 gen_il = gen_vec_interleave_lowv16qi;
44723 gen_ih = gen_vec_interleave_highv16qi;
44724 break;
44725 case V32QImode:
44726 himode = V16HImode;
44727 gen_il = gen_avx2_interleave_lowv32qi;
44728 gen_ih = gen_avx2_interleave_highv32qi;
44729 break;
44730 default:
44731 gcc_unreachable ();
44734 op2_l = op2_h = op2;
44735 switch (code)
44737 case MULT:
44738 /* Unpack data such that we've got a source byte in each low byte of
44739 each word. We don't care what goes into the high byte of each word.
44740 Rather than trying to get zero in there, most convenient is to let
44741 it be a copy of the low byte. */
44742 op2_l = gen_reg_rtx (qimode);
44743 op2_h = gen_reg_rtx (qimode);
44744 emit_insn (gen_il (op2_l, op2, op2));
44745 emit_insn (gen_ih (op2_h, op2, op2));
44746 /* FALLTHRU */
44748 op1_l = gen_reg_rtx (qimode);
44749 op1_h = gen_reg_rtx (qimode);
44750 emit_insn (gen_il (op1_l, op1, op1));
44751 emit_insn (gen_ih (op1_h, op1, op1));
44752 full_interleave = qimode == V16QImode;
44753 break;
44755 case ASHIFT:
44756 case LSHIFTRT:
44757 uns_p = true;
44758 /* FALLTHRU */
44759 case ASHIFTRT:
44760 op1_l = gen_reg_rtx (himode);
44761 op1_h = gen_reg_rtx (himode);
44762 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44763 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44764 full_interleave = true;
44765 break;
44766 default:
44767 gcc_unreachable ();
44770 /* Perform the operation. */
44771 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44772 1, OPTAB_DIRECT);
44773 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44774 1, OPTAB_DIRECT);
44775 gcc_assert (res_l && res_h);
44777 /* Merge the data back into the right place. */
44778 d.target = dest;
44779 d.op0 = gen_lowpart (qimode, res_l);
44780 d.op1 = gen_lowpart (qimode, res_h);
44781 d.vmode = qimode;
44782 d.nelt = GET_MODE_NUNITS (qimode);
44783 d.one_operand_p = false;
44784 d.testing_p = false;
44786 if (full_interleave)
44788 /* For SSE2, we used an full interleave, so the desired
44789 results are in the even elements. */
44790 for (i = 0; i < 32; ++i)
44791 d.perm[i] = i * 2;
44793 else
44795 /* For AVX, the interleave used above was not cross-lane. So the
44796 extraction is evens but with the second and third quarter swapped.
44797 Happily, that is even one insn shorter than even extraction. */
44798 for (i = 0; i < 32; ++i)
44799 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44802 ok = ix86_expand_vec_perm_const_1 (&d);
44803 gcc_assert (ok);
44805 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44806 gen_rtx_fmt_ee (code, qimode, op1, op2));
44809 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44810 if op is CONST_VECTOR with all odd elements equal to their
44811 preceding element. */
44813 static bool
44814 const_vector_equal_evenodd_p (rtx op)
44816 enum machine_mode mode = GET_MODE (op);
44817 int i, nunits = GET_MODE_NUNITS (mode);
44818 if (GET_CODE (op) != CONST_VECTOR
44819 || nunits != CONST_VECTOR_NUNITS (op))
44820 return false;
44821 for (i = 0; i < nunits; i += 2)
44822 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44823 return false;
44824 return true;
44827 void
44828 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44829 bool uns_p, bool odd_p)
44831 enum machine_mode mode = GET_MODE (op1);
44832 enum machine_mode wmode = GET_MODE (dest);
44833 rtx x;
44834 rtx orig_op1 = op1, orig_op2 = op2;
44836 if (!nonimmediate_operand (op1, mode))
44837 op1 = force_reg (mode, op1);
44838 if (!nonimmediate_operand (op2, mode))
44839 op2 = force_reg (mode, op2);
44841 /* We only play even/odd games with vectors of SImode. */
44842 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44844 /* If we're looking for the odd results, shift those members down to
44845 the even slots. For some cpus this is faster than a PSHUFD. */
44846 if (odd_p)
44848 /* For XOP use vpmacsdqh, but only for smult, as it is only
44849 signed. */
44850 if (TARGET_XOP && mode == V4SImode && !uns_p)
44852 x = force_reg (wmode, CONST0_RTX (wmode));
44853 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44854 return;
44857 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44858 if (!const_vector_equal_evenodd_p (orig_op1))
44859 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44860 x, NULL, 1, OPTAB_DIRECT);
44861 if (!const_vector_equal_evenodd_p (orig_op2))
44862 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44863 x, NULL, 1, OPTAB_DIRECT);
44864 op1 = gen_lowpart (mode, op1);
44865 op2 = gen_lowpart (mode, op2);
44868 if (mode == V16SImode)
44870 if (uns_p)
44871 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44872 else
44873 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44875 else if (mode == V8SImode)
44877 if (uns_p)
44878 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44879 else
44880 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44882 else if (uns_p)
44883 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44884 else if (TARGET_SSE4_1)
44885 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
44886 else
44888 rtx s1, s2, t0, t1, t2;
44890 /* The easiest way to implement this without PMULDQ is to go through
44891 the motions as if we are performing a full 64-bit multiply. With
44892 the exception that we need to do less shuffling of the elements. */
44894 /* Compute the sign-extension, aka highparts, of the two operands. */
44895 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44896 op1, pc_rtx, pc_rtx);
44897 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44898 op2, pc_rtx, pc_rtx);
44900 /* Multiply LO(A) * HI(B), and vice-versa. */
44901 t1 = gen_reg_rtx (wmode);
44902 t2 = gen_reg_rtx (wmode);
44903 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
44904 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
44906 /* Multiply LO(A) * LO(B). */
44907 t0 = gen_reg_rtx (wmode);
44908 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
44910 /* Combine and shift the highparts into place. */
44911 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
44912 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
44913 1, OPTAB_DIRECT);
44915 /* Combine high and low parts. */
44916 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
44917 return;
44919 emit_insn (x);
44922 void
44923 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
44924 bool uns_p, bool high_p)
44926 enum machine_mode wmode = GET_MODE (dest);
44927 enum machine_mode mode = GET_MODE (op1);
44928 rtx t1, t2, t3, t4, mask;
44930 switch (mode)
44932 case V4SImode:
44933 t1 = gen_reg_rtx (mode);
44934 t2 = gen_reg_rtx (mode);
44935 if (TARGET_XOP && !uns_p)
44937 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
44938 shuffle the elements once so that all elements are in the right
44939 place for immediate use: { A C B D }. */
44940 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
44941 const1_rtx, GEN_INT (3)));
44942 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
44943 const1_rtx, GEN_INT (3)));
44945 else
44947 /* Put the elements into place for the multiply. */
44948 ix86_expand_vec_interleave (t1, op1, op1, high_p);
44949 ix86_expand_vec_interleave (t2, op2, op2, high_p);
44950 high_p = false;
44952 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
44953 break;
44955 case V8SImode:
44956 /* Shuffle the elements between the lanes. After this we
44957 have { A B E F | C D G H } for each operand. */
44958 t1 = gen_reg_rtx (V4DImode);
44959 t2 = gen_reg_rtx (V4DImode);
44960 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
44961 const0_rtx, const2_rtx,
44962 const1_rtx, GEN_INT (3)));
44963 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
44964 const0_rtx, const2_rtx,
44965 const1_rtx, GEN_INT (3)));
44967 /* Shuffle the elements within the lanes. After this we
44968 have { A A B B | C C D D } or { E E F F | G G H H }. */
44969 t3 = gen_reg_rtx (V8SImode);
44970 t4 = gen_reg_rtx (V8SImode);
44971 mask = GEN_INT (high_p
44972 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
44973 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
44974 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
44975 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
44977 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
44978 break;
44980 case V8HImode:
44981 case V16HImode:
44982 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
44983 uns_p, OPTAB_DIRECT);
44984 t2 = expand_binop (mode,
44985 uns_p ? umul_highpart_optab : smul_highpart_optab,
44986 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
44987 gcc_assert (t1 && t2);
44989 t3 = gen_reg_rtx (mode);
44990 ix86_expand_vec_interleave (t3, t1, t2, high_p);
44991 emit_move_insn (dest, gen_lowpart (wmode, t3));
44992 break;
44994 case V16QImode:
44995 case V32QImode:
44996 t1 = gen_reg_rtx (wmode);
44997 t2 = gen_reg_rtx (wmode);
44998 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
44999 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45001 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45002 break;
45004 default:
45005 gcc_unreachable ();
45009 void
45010 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45012 rtx res_1, res_2, res_3, res_4;
45014 res_1 = gen_reg_rtx (V4SImode);
45015 res_2 = gen_reg_rtx (V4SImode);
45016 res_3 = gen_reg_rtx (V2DImode);
45017 res_4 = gen_reg_rtx (V2DImode);
45018 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45019 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45021 /* Move the results in element 2 down to element 1; we don't care
45022 what goes in elements 2 and 3. Then we can merge the parts
45023 back together with an interleave.
45025 Note that two other sequences were tried:
45026 (1) Use interleaves at the start instead of psrldq, which allows
45027 us to use a single shufps to merge things back at the end.
45028 (2) Use shufps here to combine the two vectors, then pshufd to
45029 put the elements in the correct order.
45030 In both cases the cost of the reformatting stall was too high
45031 and the overall sequence slower. */
45033 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45034 const0_rtx, const2_rtx,
45035 const0_rtx, const0_rtx));
45036 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45037 const0_rtx, const2_rtx,
45038 const0_rtx, const0_rtx));
45039 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45041 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45044 void
45045 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45047 enum machine_mode mode = GET_MODE (op0);
45048 rtx t1, t2, t3, t4, t5, t6;
45050 if (TARGET_XOP && mode == V2DImode)
45052 /* op1: A,B,C,D, op2: E,F,G,H */
45053 op1 = gen_lowpart (V4SImode, op1);
45054 op2 = gen_lowpart (V4SImode, op2);
45056 t1 = gen_reg_rtx (V4SImode);
45057 t2 = gen_reg_rtx (V4SImode);
45058 t3 = gen_reg_rtx (V2DImode);
45059 t4 = gen_reg_rtx (V2DImode);
45061 /* t1: B,A,D,C */
45062 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45063 GEN_INT (1),
45064 GEN_INT (0),
45065 GEN_INT (3),
45066 GEN_INT (2)));
45068 /* t2: (B*E),(A*F),(D*G),(C*H) */
45069 emit_insn (gen_mulv4si3 (t2, t1, op2));
45071 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45072 emit_insn (gen_xop_phadddq (t3, t2));
45074 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45075 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45077 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45078 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45080 else
45082 enum machine_mode nmode;
45083 rtx (*umul) (rtx, rtx, rtx);
45085 if (mode == V2DImode)
45087 umul = gen_vec_widen_umult_even_v4si;
45088 nmode = V4SImode;
45090 else if (mode == V4DImode)
45092 umul = gen_vec_widen_umult_even_v8si;
45093 nmode = V8SImode;
45095 else if (mode == V8DImode)
45097 umul = gen_vec_widen_umult_even_v16si;
45098 nmode = V16SImode;
45100 else
45101 gcc_unreachable ();
45104 /* Multiply low parts. */
45105 t1 = gen_reg_rtx (mode);
45106 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45108 /* Shift input vectors right 32 bits so we can multiply high parts. */
45109 t6 = GEN_INT (32);
45110 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45111 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45113 /* Multiply high parts by low parts. */
45114 t4 = gen_reg_rtx (mode);
45115 t5 = gen_reg_rtx (mode);
45116 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45117 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45119 /* Combine and shift the highparts back. */
45120 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45121 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45123 /* Combine high and low parts. */
45124 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45127 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45128 gen_rtx_MULT (mode, op1, op2));
45131 /* Calculate integer abs() using only SSE2 instructions. */
45133 void
45134 ix86_expand_sse2_abs (rtx target, rtx input)
45136 enum machine_mode mode = GET_MODE (target);
45137 rtx tmp0, tmp1, x;
45139 switch (mode)
45141 /* For 32-bit signed integer X, the best way to calculate the absolute
45142 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45143 case V4SImode:
45144 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45145 GEN_INT (GET_MODE_BITSIZE
45146 (GET_MODE_INNER (mode)) - 1),
45147 NULL, 0, OPTAB_DIRECT);
45148 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45149 NULL, 0, OPTAB_DIRECT);
45150 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45151 target, 0, OPTAB_DIRECT);
45152 break;
45154 /* For 16-bit signed integer X, the best way to calculate the absolute
45155 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45156 case V8HImode:
45157 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45159 x = expand_simple_binop (mode, SMAX, tmp0, input,
45160 target, 0, OPTAB_DIRECT);
45161 break;
45163 /* For 8-bit signed integer X, the best way to calculate the absolute
45164 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45165 as SSE2 provides the PMINUB insn. */
45166 case V16QImode:
45167 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45169 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45170 target, 0, OPTAB_DIRECT);
45171 break;
45173 default:
45174 gcc_unreachable ();
45177 if (x != target)
45178 emit_move_insn (target, x);
45181 /* Expand an insert into a vector register through pinsr insn.
45182 Return true if successful. */
45184 bool
45185 ix86_expand_pinsr (rtx *operands)
45187 rtx dst = operands[0];
45188 rtx src = operands[3];
45190 unsigned int size = INTVAL (operands[1]);
45191 unsigned int pos = INTVAL (operands[2]);
45193 if (GET_CODE (dst) == SUBREG)
45195 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45196 dst = SUBREG_REG (dst);
45199 if (GET_CODE (src) == SUBREG)
45200 src = SUBREG_REG (src);
45202 switch (GET_MODE (dst))
45204 case V16QImode:
45205 case V8HImode:
45206 case V4SImode:
45207 case V2DImode:
45209 enum machine_mode srcmode, dstmode;
45210 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45212 srcmode = mode_for_size (size, MODE_INT, 0);
45214 switch (srcmode)
45216 case QImode:
45217 if (!TARGET_SSE4_1)
45218 return false;
45219 dstmode = V16QImode;
45220 pinsr = gen_sse4_1_pinsrb;
45221 break;
45223 case HImode:
45224 if (!TARGET_SSE2)
45225 return false;
45226 dstmode = V8HImode;
45227 pinsr = gen_sse2_pinsrw;
45228 break;
45230 case SImode:
45231 if (!TARGET_SSE4_1)
45232 return false;
45233 dstmode = V4SImode;
45234 pinsr = gen_sse4_1_pinsrd;
45235 break;
45237 case DImode:
45238 gcc_assert (TARGET_64BIT);
45239 if (!TARGET_SSE4_1)
45240 return false;
45241 dstmode = V2DImode;
45242 pinsr = gen_sse4_1_pinsrq;
45243 break;
45245 default:
45246 return false;
45249 rtx d = dst;
45250 if (GET_MODE (dst) != dstmode)
45251 d = gen_reg_rtx (dstmode);
45252 src = gen_lowpart (srcmode, src);
45254 pos /= size;
45256 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45257 GEN_INT (1 << pos)));
45258 if (d != dst)
45259 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45260 return true;
45263 default:
45264 return false;
45268 /* This function returns the calling abi specific va_list type node.
45269 It returns the FNDECL specific va_list type. */
45271 static tree
45272 ix86_fn_abi_va_list (tree fndecl)
45274 if (!TARGET_64BIT)
45275 return va_list_type_node;
45276 gcc_assert (fndecl != NULL_TREE);
45278 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45279 return ms_va_list_type_node;
45280 else
45281 return sysv_va_list_type_node;
45284 /* Returns the canonical va_list type specified by TYPE. If there
45285 is no valid TYPE provided, it return NULL_TREE. */
45287 static tree
45288 ix86_canonical_va_list_type (tree type)
45290 tree wtype, htype;
45292 /* Resolve references and pointers to va_list type. */
45293 if (TREE_CODE (type) == MEM_REF)
45294 type = TREE_TYPE (type);
45295 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45296 type = TREE_TYPE (type);
45297 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45298 type = TREE_TYPE (type);
45300 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45302 wtype = va_list_type_node;
45303 gcc_assert (wtype != NULL_TREE);
45304 htype = type;
45305 if (TREE_CODE (wtype) == ARRAY_TYPE)
45307 /* If va_list is an array type, the argument may have decayed
45308 to a pointer type, e.g. by being passed to another function.
45309 In that case, unwrap both types so that we can compare the
45310 underlying records. */
45311 if (TREE_CODE (htype) == ARRAY_TYPE
45312 || POINTER_TYPE_P (htype))
45314 wtype = TREE_TYPE (wtype);
45315 htype = TREE_TYPE (htype);
45318 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45319 return va_list_type_node;
45320 wtype = sysv_va_list_type_node;
45321 gcc_assert (wtype != NULL_TREE);
45322 htype = type;
45323 if (TREE_CODE (wtype) == ARRAY_TYPE)
45325 /* If va_list is an array type, the argument may have decayed
45326 to a pointer type, e.g. by being passed to another function.
45327 In that case, unwrap both types so that we can compare the
45328 underlying records. */
45329 if (TREE_CODE (htype) == ARRAY_TYPE
45330 || POINTER_TYPE_P (htype))
45332 wtype = TREE_TYPE (wtype);
45333 htype = TREE_TYPE (htype);
45336 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45337 return sysv_va_list_type_node;
45338 wtype = ms_va_list_type_node;
45339 gcc_assert (wtype != NULL_TREE);
45340 htype = type;
45341 if (TREE_CODE (wtype) == ARRAY_TYPE)
45343 /* If va_list is an array type, the argument may have decayed
45344 to a pointer type, e.g. by being passed to another function.
45345 In that case, unwrap both types so that we can compare the
45346 underlying records. */
45347 if (TREE_CODE (htype) == ARRAY_TYPE
45348 || POINTER_TYPE_P (htype))
45350 wtype = TREE_TYPE (wtype);
45351 htype = TREE_TYPE (htype);
45354 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45355 return ms_va_list_type_node;
45356 return NULL_TREE;
45358 return std_canonical_va_list_type (type);
45361 /* Iterate through the target-specific builtin types for va_list.
45362 IDX denotes the iterator, *PTREE is set to the result type of
45363 the va_list builtin, and *PNAME to its internal type.
45364 Returns zero if there is no element for this index, otherwise
45365 IDX should be increased upon the next call.
45366 Note, do not iterate a base builtin's name like __builtin_va_list.
45367 Used from c_common_nodes_and_builtins. */
45369 static int
45370 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45372 if (TARGET_64BIT)
45374 switch (idx)
45376 default:
45377 break;
45379 case 0:
45380 *ptree = ms_va_list_type_node;
45381 *pname = "__builtin_ms_va_list";
45382 return 1;
45384 case 1:
45385 *ptree = sysv_va_list_type_node;
45386 *pname = "__builtin_sysv_va_list";
45387 return 1;
45391 return 0;
45394 #undef TARGET_SCHED_DISPATCH
45395 #define TARGET_SCHED_DISPATCH has_dispatch
45396 #undef TARGET_SCHED_DISPATCH_DO
45397 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45398 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45399 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45400 #undef TARGET_SCHED_REORDER
45401 #define TARGET_SCHED_REORDER ix86_sched_reorder
45402 #undef TARGET_SCHED_ADJUST_PRIORITY
45403 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45404 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45405 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45406 ix86_dependencies_evaluation_hook
45408 /* The size of the dispatch window is the total number of bytes of
45409 object code allowed in a window. */
45410 #define DISPATCH_WINDOW_SIZE 16
45412 /* Number of dispatch windows considered for scheduling. */
45413 #define MAX_DISPATCH_WINDOWS 3
45415 /* Maximum number of instructions in a window. */
45416 #define MAX_INSN 4
45418 /* Maximum number of immediate operands in a window. */
45419 #define MAX_IMM 4
45421 /* Maximum number of immediate bits allowed in a window. */
45422 #define MAX_IMM_SIZE 128
45424 /* Maximum number of 32 bit immediates allowed in a window. */
45425 #define MAX_IMM_32 4
45427 /* Maximum number of 64 bit immediates allowed in a window. */
45428 #define MAX_IMM_64 2
45430 /* Maximum total of loads or prefetches allowed in a window. */
45431 #define MAX_LOAD 2
45433 /* Maximum total of stores allowed in a window. */
45434 #define MAX_STORE 1
45436 #undef BIG
45437 #define BIG 100
45440 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45441 enum dispatch_group {
45442 disp_no_group = 0,
45443 disp_load,
45444 disp_store,
45445 disp_load_store,
45446 disp_prefetch,
45447 disp_imm,
45448 disp_imm_32,
45449 disp_imm_64,
45450 disp_branch,
45451 disp_cmp,
45452 disp_jcc,
45453 disp_last
45456 /* Number of allowable groups in a dispatch window. It is an array
45457 indexed by dispatch_group enum. 100 is used as a big number,
45458 because the number of these kind of operations does not have any
45459 effect in dispatch window, but we need them for other reasons in
45460 the table. */
45461 static unsigned int num_allowable_groups[disp_last] = {
45462 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45465 char group_name[disp_last + 1][16] = {
45466 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45467 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45468 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45471 /* Instruction path. */
45472 enum insn_path {
45473 no_path = 0,
45474 path_single, /* Single micro op. */
45475 path_double, /* Double micro op. */
45476 path_multi, /* Instructions with more than 2 micro op.. */
45477 last_path
45480 /* sched_insn_info defines a window to the instructions scheduled in
45481 the basic block. It contains a pointer to the insn_info table and
45482 the instruction scheduled.
45484 Windows are allocated for each basic block and are linked
45485 together. */
45486 typedef struct sched_insn_info_s {
45487 rtx insn;
45488 enum dispatch_group group;
45489 enum insn_path path;
45490 int byte_len;
45491 int imm_bytes;
45492 } sched_insn_info;
45494 /* Linked list of dispatch windows. This is a two way list of
45495 dispatch windows of a basic block. It contains information about
45496 the number of uops in the window and the total number of
45497 instructions and of bytes in the object code for this dispatch
45498 window. */
45499 typedef struct dispatch_windows_s {
45500 int num_insn; /* Number of insn in the window. */
45501 int num_uops; /* Number of uops in the window. */
45502 int window_size; /* Number of bytes in the window. */
45503 int window_num; /* Window number between 0 or 1. */
45504 int num_imm; /* Number of immediates in an insn. */
45505 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45506 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45507 int imm_size; /* Total immediates in the window. */
45508 int num_loads; /* Total memory loads in the window. */
45509 int num_stores; /* Total memory stores in the window. */
45510 int violation; /* Violation exists in window. */
45511 sched_insn_info *window; /* Pointer to the window. */
45512 struct dispatch_windows_s *next;
45513 struct dispatch_windows_s *prev;
45514 } dispatch_windows;
45516 /* Immediate valuse used in an insn. */
45517 typedef struct imm_info_s
45519 int imm;
45520 int imm32;
45521 int imm64;
45522 } imm_info;
45524 static dispatch_windows *dispatch_window_list;
45525 static dispatch_windows *dispatch_window_list1;
45527 /* Get dispatch group of insn. */
45529 static enum dispatch_group
45530 get_mem_group (rtx insn)
45532 enum attr_memory memory;
45534 if (INSN_CODE (insn) < 0)
45535 return disp_no_group;
45536 memory = get_attr_memory (insn);
45537 if (memory == MEMORY_STORE)
45538 return disp_store;
45540 if (memory == MEMORY_LOAD)
45541 return disp_load;
45543 if (memory == MEMORY_BOTH)
45544 return disp_load_store;
45546 return disp_no_group;
45549 /* Return true if insn is a compare instruction. */
45551 static bool
45552 is_cmp (rtx insn)
45554 enum attr_type type;
45556 type = get_attr_type (insn);
45557 return (type == TYPE_TEST
45558 || type == TYPE_ICMP
45559 || type == TYPE_FCMP
45560 || GET_CODE (PATTERN (insn)) == COMPARE);
45563 /* Return true if a dispatch violation encountered. */
45565 static bool
45566 dispatch_violation (void)
45568 if (dispatch_window_list->next)
45569 return dispatch_window_list->next->violation;
45570 return dispatch_window_list->violation;
45573 /* Return true if insn is a branch instruction. */
45575 static bool
45576 is_branch (rtx insn)
45578 return (CALL_P (insn) || JUMP_P (insn));
45581 /* Return true if insn is a prefetch instruction. */
45583 static bool
45584 is_prefetch (rtx insn)
45586 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45589 /* This function initializes a dispatch window and the list container holding a
45590 pointer to the window. */
45592 static void
45593 init_window (int window_num)
45595 int i;
45596 dispatch_windows *new_list;
45598 if (window_num == 0)
45599 new_list = dispatch_window_list;
45600 else
45601 new_list = dispatch_window_list1;
45603 new_list->num_insn = 0;
45604 new_list->num_uops = 0;
45605 new_list->window_size = 0;
45606 new_list->next = NULL;
45607 new_list->prev = NULL;
45608 new_list->window_num = window_num;
45609 new_list->num_imm = 0;
45610 new_list->num_imm_32 = 0;
45611 new_list->num_imm_64 = 0;
45612 new_list->imm_size = 0;
45613 new_list->num_loads = 0;
45614 new_list->num_stores = 0;
45615 new_list->violation = false;
45617 for (i = 0; i < MAX_INSN; i++)
45619 new_list->window[i].insn = NULL;
45620 new_list->window[i].group = disp_no_group;
45621 new_list->window[i].path = no_path;
45622 new_list->window[i].byte_len = 0;
45623 new_list->window[i].imm_bytes = 0;
45625 return;
45628 /* This function allocates and initializes a dispatch window and the
45629 list container holding a pointer to the window. */
45631 static dispatch_windows *
45632 allocate_window (void)
45634 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45635 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45637 return new_list;
45640 /* This routine initializes the dispatch scheduling information. It
45641 initiates building dispatch scheduler tables and constructs the
45642 first dispatch window. */
45644 static void
45645 init_dispatch_sched (void)
45647 /* Allocate a dispatch list and a window. */
45648 dispatch_window_list = allocate_window ();
45649 dispatch_window_list1 = allocate_window ();
45650 init_window (0);
45651 init_window (1);
45654 /* This function returns true if a branch is detected. End of a basic block
45655 does not have to be a branch, but here we assume only branches end a
45656 window. */
45658 static bool
45659 is_end_basic_block (enum dispatch_group group)
45661 return group == disp_branch;
45664 /* This function is called when the end of a window processing is reached. */
45666 static void
45667 process_end_window (void)
45669 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45670 if (dispatch_window_list->next)
45672 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45673 gcc_assert (dispatch_window_list->window_size
45674 + dispatch_window_list1->window_size <= 48);
45675 init_window (1);
45677 init_window (0);
45680 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45681 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45682 for 48 bytes of instructions. Note that these windows are not dispatch
45683 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45685 static dispatch_windows *
45686 allocate_next_window (int window_num)
45688 if (window_num == 0)
45690 if (dispatch_window_list->next)
45691 init_window (1);
45692 init_window (0);
45693 return dispatch_window_list;
45696 dispatch_window_list->next = dispatch_window_list1;
45697 dispatch_window_list1->prev = dispatch_window_list;
45699 return dispatch_window_list1;
45702 /* Increment the number of immediate operands of an instruction. */
45704 static int
45705 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45707 if (*in_rtx == 0)
45708 return 0;
45710 switch ( GET_CODE (*in_rtx))
45712 case CONST:
45713 case SYMBOL_REF:
45714 case CONST_INT:
45715 (imm_values->imm)++;
45716 if (x86_64_immediate_operand (*in_rtx, SImode))
45717 (imm_values->imm32)++;
45718 else
45719 (imm_values->imm64)++;
45720 break;
45722 case CONST_DOUBLE:
45723 (imm_values->imm)++;
45724 (imm_values->imm64)++;
45725 break;
45727 case CODE_LABEL:
45728 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45730 (imm_values->imm)++;
45731 (imm_values->imm32)++;
45733 break;
45735 default:
45736 break;
45739 return 0;
45742 /* Compute number of immediate operands of an instruction. */
45744 static void
45745 find_constant (rtx in_rtx, imm_info *imm_values)
45747 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45748 (rtx_function) find_constant_1, (void *) imm_values);
45751 /* Return total size of immediate operands of an instruction along with number
45752 of corresponding immediate-operands. It initializes its parameters to zero
45753 befor calling FIND_CONSTANT.
45754 INSN is the input instruction. IMM is the total of immediates.
45755 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45756 bit immediates. */
45758 static int
45759 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45761 imm_info imm_values = {0, 0, 0};
45763 find_constant (insn, &imm_values);
45764 *imm = imm_values.imm;
45765 *imm32 = imm_values.imm32;
45766 *imm64 = imm_values.imm64;
45767 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45770 /* This function indicates if an operand of an instruction is an
45771 immediate. */
45773 static bool
45774 has_immediate (rtx insn)
45776 int num_imm_operand;
45777 int num_imm32_operand;
45778 int num_imm64_operand;
45780 if (insn)
45781 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45782 &num_imm64_operand);
45783 return false;
45786 /* Return single or double path for instructions. */
45788 static enum insn_path
45789 get_insn_path (rtx insn)
45791 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45793 if ((int)path == 0)
45794 return path_single;
45796 if ((int)path == 1)
45797 return path_double;
45799 return path_multi;
45802 /* Return insn dispatch group. */
45804 static enum dispatch_group
45805 get_insn_group (rtx insn)
45807 enum dispatch_group group = get_mem_group (insn);
45808 if (group)
45809 return group;
45811 if (is_branch (insn))
45812 return disp_branch;
45814 if (is_cmp (insn))
45815 return disp_cmp;
45817 if (has_immediate (insn))
45818 return disp_imm;
45820 if (is_prefetch (insn))
45821 return disp_prefetch;
45823 return disp_no_group;
45826 /* Count number of GROUP restricted instructions in a dispatch
45827 window WINDOW_LIST. */
45829 static int
45830 count_num_restricted (rtx insn, dispatch_windows *window_list)
45832 enum dispatch_group group = get_insn_group (insn);
45833 int imm_size;
45834 int num_imm_operand;
45835 int num_imm32_operand;
45836 int num_imm64_operand;
45838 if (group == disp_no_group)
45839 return 0;
45841 if (group == disp_imm)
45843 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45844 &num_imm64_operand);
45845 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45846 || num_imm_operand + window_list->num_imm > MAX_IMM
45847 || (num_imm32_operand > 0
45848 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45849 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45850 || (num_imm64_operand > 0
45851 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45852 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45853 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45854 && num_imm64_operand > 0
45855 && ((window_list->num_imm_64 > 0
45856 && window_list->num_insn >= 2)
45857 || window_list->num_insn >= 3)))
45858 return BIG;
45860 return 1;
45863 if ((group == disp_load_store
45864 && (window_list->num_loads >= MAX_LOAD
45865 || window_list->num_stores >= MAX_STORE))
45866 || ((group == disp_load
45867 || group == disp_prefetch)
45868 && window_list->num_loads >= MAX_LOAD)
45869 || (group == disp_store
45870 && window_list->num_stores >= MAX_STORE))
45871 return BIG;
45873 return 1;
45876 /* This function returns true if insn satisfies dispatch rules on the
45877 last window scheduled. */
45879 static bool
45880 fits_dispatch_window (rtx insn)
45882 dispatch_windows *window_list = dispatch_window_list;
45883 dispatch_windows *window_list_next = dispatch_window_list->next;
45884 unsigned int num_restrict;
45885 enum dispatch_group group = get_insn_group (insn);
45886 enum insn_path path = get_insn_path (insn);
45887 int sum;
45889 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
45890 instructions should be given the lowest priority in the
45891 scheduling process in Haifa scheduler to make sure they will be
45892 scheduled in the same dispatch window as the reference to them. */
45893 if (group == disp_jcc || group == disp_cmp)
45894 return false;
45896 /* Check nonrestricted. */
45897 if (group == disp_no_group || group == disp_branch)
45898 return true;
45900 /* Get last dispatch window. */
45901 if (window_list_next)
45902 window_list = window_list_next;
45904 if (window_list->window_num == 1)
45906 sum = window_list->prev->window_size + window_list->window_size;
45908 if (sum == 32
45909 || (min_insn_size (insn) + sum) >= 48)
45910 /* Window 1 is full. Go for next window. */
45911 return true;
45914 num_restrict = count_num_restricted (insn, window_list);
45916 if (num_restrict > num_allowable_groups[group])
45917 return false;
45919 /* See if it fits in the first window. */
45920 if (window_list->window_num == 0)
45922 /* The first widow should have only single and double path
45923 uops. */
45924 if (path == path_double
45925 && (window_list->num_uops + 2) > MAX_INSN)
45926 return false;
45927 else if (path != path_single)
45928 return false;
45930 return true;
45933 /* Add an instruction INSN with NUM_UOPS micro-operations to the
45934 dispatch window WINDOW_LIST. */
45936 static void
45937 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
45939 int byte_len = min_insn_size (insn);
45940 int num_insn = window_list->num_insn;
45941 int imm_size;
45942 sched_insn_info *window = window_list->window;
45943 enum dispatch_group group = get_insn_group (insn);
45944 enum insn_path path = get_insn_path (insn);
45945 int num_imm_operand;
45946 int num_imm32_operand;
45947 int num_imm64_operand;
45949 if (!window_list->violation && group != disp_cmp
45950 && !fits_dispatch_window (insn))
45951 window_list->violation = true;
45953 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45954 &num_imm64_operand);
45956 /* Initialize window with new instruction. */
45957 window[num_insn].insn = insn;
45958 window[num_insn].byte_len = byte_len;
45959 window[num_insn].group = group;
45960 window[num_insn].path = path;
45961 window[num_insn].imm_bytes = imm_size;
45963 window_list->window_size += byte_len;
45964 window_list->num_insn = num_insn + 1;
45965 window_list->num_uops = window_list->num_uops + num_uops;
45966 window_list->imm_size += imm_size;
45967 window_list->num_imm += num_imm_operand;
45968 window_list->num_imm_32 += num_imm32_operand;
45969 window_list->num_imm_64 += num_imm64_operand;
45971 if (group == disp_store)
45972 window_list->num_stores += 1;
45973 else if (group == disp_load
45974 || group == disp_prefetch)
45975 window_list->num_loads += 1;
45976 else if (group == disp_load_store)
45978 window_list->num_stores += 1;
45979 window_list->num_loads += 1;
45983 /* Adds a scheduled instruction, INSN, to the current dispatch window.
45984 If the total bytes of instructions or the number of instructions in
45985 the window exceed allowable, it allocates a new window. */
45987 static void
45988 add_to_dispatch_window (rtx insn)
45990 int byte_len;
45991 dispatch_windows *window_list;
45992 dispatch_windows *next_list;
45993 dispatch_windows *window0_list;
45994 enum insn_path path;
45995 enum dispatch_group insn_group;
45996 bool insn_fits;
45997 int num_insn;
45998 int num_uops;
45999 int window_num;
46000 int insn_num_uops;
46001 int sum;
46003 if (INSN_CODE (insn) < 0)
46004 return;
46006 byte_len = min_insn_size (insn);
46007 window_list = dispatch_window_list;
46008 next_list = window_list->next;
46009 path = get_insn_path (insn);
46010 insn_group = get_insn_group (insn);
46012 /* Get the last dispatch window. */
46013 if (next_list)
46014 window_list = dispatch_window_list->next;
46016 if (path == path_single)
46017 insn_num_uops = 1;
46018 else if (path == path_double)
46019 insn_num_uops = 2;
46020 else
46021 insn_num_uops = (int) path;
46023 /* If current window is full, get a new window.
46024 Window number zero is full, if MAX_INSN uops are scheduled in it.
46025 Window number one is full, if window zero's bytes plus window
46026 one's bytes is 32, or if the bytes of the new instruction added
46027 to the total makes it greater than 48, or it has already MAX_INSN
46028 instructions in it. */
46029 num_insn = window_list->num_insn;
46030 num_uops = window_list->num_uops;
46031 window_num = window_list->window_num;
46032 insn_fits = fits_dispatch_window (insn);
46034 if (num_insn >= MAX_INSN
46035 || num_uops + insn_num_uops > MAX_INSN
46036 || !(insn_fits))
46038 window_num = ~window_num & 1;
46039 window_list = allocate_next_window (window_num);
46042 if (window_num == 0)
46044 add_insn_window (insn, window_list, insn_num_uops);
46045 if (window_list->num_insn >= MAX_INSN
46046 && insn_group == disp_branch)
46048 process_end_window ();
46049 return;
46052 else if (window_num == 1)
46054 window0_list = window_list->prev;
46055 sum = window0_list->window_size + window_list->window_size;
46056 if (sum == 32
46057 || (byte_len + sum) >= 48)
46059 process_end_window ();
46060 window_list = dispatch_window_list;
46063 add_insn_window (insn, window_list, insn_num_uops);
46065 else
46066 gcc_unreachable ();
46068 if (is_end_basic_block (insn_group))
46070 /* End of basic block is reached do end-basic-block process. */
46071 process_end_window ();
46072 return;
46076 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46078 DEBUG_FUNCTION static void
46079 debug_dispatch_window_file (FILE *file, int window_num)
46081 dispatch_windows *list;
46082 int i;
46084 if (window_num == 0)
46085 list = dispatch_window_list;
46086 else
46087 list = dispatch_window_list1;
46089 fprintf (file, "Window #%d:\n", list->window_num);
46090 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46091 list->num_insn, list->num_uops, list->window_size);
46092 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46093 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46095 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46096 list->num_stores);
46097 fprintf (file, " insn info:\n");
46099 for (i = 0; i < MAX_INSN; i++)
46101 if (!list->window[i].insn)
46102 break;
46103 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46104 i, group_name[list->window[i].group],
46105 i, (void *)list->window[i].insn,
46106 i, list->window[i].path,
46107 i, list->window[i].byte_len,
46108 i, list->window[i].imm_bytes);
46112 /* Print to stdout a dispatch window. */
46114 DEBUG_FUNCTION void
46115 debug_dispatch_window (int window_num)
46117 debug_dispatch_window_file (stdout, window_num);
46120 /* Print INSN dispatch information to FILE. */
46122 DEBUG_FUNCTION static void
46123 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46125 int byte_len;
46126 enum insn_path path;
46127 enum dispatch_group group;
46128 int imm_size;
46129 int num_imm_operand;
46130 int num_imm32_operand;
46131 int num_imm64_operand;
46133 if (INSN_CODE (insn) < 0)
46134 return;
46136 byte_len = min_insn_size (insn);
46137 path = get_insn_path (insn);
46138 group = get_insn_group (insn);
46139 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46140 &num_imm64_operand);
46142 fprintf (file, " insn info:\n");
46143 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46144 group_name[group], path, byte_len);
46145 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46146 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46149 /* Print to STDERR the status of the ready list with respect to
46150 dispatch windows. */
46152 DEBUG_FUNCTION void
46153 debug_ready_dispatch (void)
46155 int i;
46156 int no_ready = number_in_ready ();
46158 fprintf (stdout, "Number of ready: %d\n", no_ready);
46160 for (i = 0; i < no_ready; i++)
46161 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46164 /* This routine is the driver of the dispatch scheduler. */
46166 static void
46167 do_dispatch (rtx insn, int mode)
46169 if (mode == DISPATCH_INIT)
46170 init_dispatch_sched ();
46171 else if (mode == ADD_TO_DISPATCH_WINDOW)
46172 add_to_dispatch_window (insn);
46175 /* Return TRUE if Dispatch Scheduling is supported. */
46177 static bool
46178 has_dispatch (rtx insn, int action)
46180 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46181 && flag_dispatch_scheduler)
46182 switch (action)
46184 default:
46185 return false;
46187 case IS_DISPATCH_ON:
46188 return true;
46189 break;
46191 case IS_CMP:
46192 return is_cmp (insn);
46194 case DISPATCH_VIOLATION:
46195 return dispatch_violation ();
46197 case FITS_DISPATCH_WINDOW:
46198 return fits_dispatch_window (insn);
46201 return false;
46204 /* Implementation of reassociation_width target hook used by
46205 reassoc phase to identify parallelism level in reassociated
46206 tree. Statements tree_code is passed in OPC. Arguments type
46207 is passed in MODE.
46209 Currently parallel reassociation is enabled for Atom
46210 processors only and we set reassociation width to be 2
46211 because Atom may issue up to 2 instructions per cycle.
46213 Return value should be fixed if parallel reassociation is
46214 enabled for other processors. */
46216 static int
46217 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46218 enum machine_mode mode)
46220 int res = 1;
46222 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46223 res = 2;
46224 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46225 res = 2;
46227 return res;
46230 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46231 place emms and femms instructions. */
46233 static enum machine_mode
46234 ix86_preferred_simd_mode (enum machine_mode mode)
46236 if (!TARGET_SSE)
46237 return word_mode;
46239 switch (mode)
46241 case QImode:
46242 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46243 case HImode:
46244 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46245 case SImode:
46246 return TARGET_AVX512F ? V16SImode :
46247 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46248 case DImode:
46249 return TARGET_AVX512F ? V8DImode :
46250 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46252 case SFmode:
46253 if (TARGET_AVX512F)
46254 return V16SFmode;
46255 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46256 return V8SFmode;
46257 else
46258 return V4SFmode;
46260 case DFmode:
46261 if (!TARGET_VECTORIZE_DOUBLE)
46262 return word_mode;
46263 else if (TARGET_AVX512F)
46264 return V8DFmode;
46265 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46266 return V4DFmode;
46267 else if (TARGET_SSE2)
46268 return V2DFmode;
46269 /* FALLTHRU */
46271 default:
46272 return word_mode;
46276 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46277 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46278 256bit and 128bit vectors. */
46280 static unsigned int
46281 ix86_autovectorize_vector_sizes (void)
46283 return TARGET_AVX512F ? 64 | 32 | 16 :
46284 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46289 /* Return class of registers which could be used for pseudo of MODE
46290 and of class RCLASS for spilling instead of memory. Return NO_REGS
46291 if it is not possible or non-profitable. */
46292 static reg_class_t
46293 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46295 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46296 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46297 && INTEGER_CLASS_P (rclass))
46298 return ALL_SSE_REGS;
46299 return NO_REGS;
46302 /* Implement targetm.vectorize.init_cost. */
46304 static void *
46305 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46307 unsigned *cost = XNEWVEC (unsigned, 3);
46308 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46309 return cost;
46312 /* Implement targetm.vectorize.add_stmt_cost. */
46314 static unsigned
46315 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46316 struct _stmt_vec_info *stmt_info, int misalign,
46317 enum vect_cost_model_location where)
46319 unsigned *cost = (unsigned *) data;
46320 unsigned retval = 0;
46322 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46323 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46325 /* Statements in an inner loop relative to the loop being
46326 vectorized are weighted more heavily. The value here is
46327 arbitrary and could potentially be improved with analysis. */
46328 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46329 count *= 50; /* FIXME. */
46331 retval = (unsigned) (count * stmt_cost);
46332 cost[where] += retval;
46334 return retval;
46337 /* Implement targetm.vectorize.finish_cost. */
46339 static void
46340 ix86_finish_cost (void *data, unsigned *prologue_cost,
46341 unsigned *body_cost, unsigned *epilogue_cost)
46343 unsigned *cost = (unsigned *) data;
46344 *prologue_cost = cost[vect_prologue];
46345 *body_cost = cost[vect_body];
46346 *epilogue_cost = cost[vect_epilogue];
46349 /* Implement targetm.vectorize.destroy_cost_data. */
46351 static void
46352 ix86_destroy_cost_data (void *data)
46354 free (data);
46357 /* Validate target specific memory model bits in VAL. */
46359 static unsigned HOST_WIDE_INT
46360 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46362 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46363 bool strong;
46365 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46366 |MEMMODEL_MASK)
46367 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46369 warning (OPT_Winvalid_memory_model,
46370 "Unknown architecture specific memory model");
46371 return MEMMODEL_SEQ_CST;
46373 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46374 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46376 warning (OPT_Winvalid_memory_model,
46377 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46378 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46380 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46382 warning (OPT_Winvalid_memory_model,
46383 "HLE_RELEASE not used with RELEASE or stronger memory model");
46384 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46386 return val;
46389 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46390 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46391 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46392 or number of vecsize_mangle variants that should be emitted. */
46394 static int
46395 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46396 struct cgraph_simd_clone *clonei,
46397 tree base_type, int num)
46399 int ret = 1;
46401 if (clonei->simdlen
46402 && (clonei->simdlen < 2
46403 || clonei->simdlen > 16
46404 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46406 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46407 "unsupported simdlen %d", clonei->simdlen);
46408 return 0;
46411 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46412 if (TREE_CODE (ret_type) != VOID_TYPE)
46413 switch (TYPE_MODE (ret_type))
46415 case QImode:
46416 case HImode:
46417 case SImode:
46418 case DImode:
46419 case SFmode:
46420 case DFmode:
46421 /* case SCmode: */
46422 /* case DCmode: */
46423 break;
46424 default:
46425 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46426 "unsupported return type %qT for simd\n", ret_type);
46427 return 0;
46430 tree t;
46431 int i;
46433 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46434 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46435 switch (TYPE_MODE (TREE_TYPE (t)))
46437 case QImode:
46438 case HImode:
46439 case SImode:
46440 case DImode:
46441 case SFmode:
46442 case DFmode:
46443 /* case SCmode: */
46444 /* case DCmode: */
46445 break;
46446 default:
46447 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46448 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46449 return 0;
46452 if (clonei->cilk_elemental)
46454 /* Parse here processor clause. If not present, default to 'b'. */
46455 clonei->vecsize_mangle = 'b';
46457 else if (!TREE_PUBLIC (node->decl))
46459 /* If the function isn't exported, we can pick up just one ISA
46460 for the clones. */
46461 if (TARGET_AVX2)
46462 clonei->vecsize_mangle = 'd';
46463 else if (TARGET_AVX)
46464 clonei->vecsize_mangle = 'c';
46465 else
46466 clonei->vecsize_mangle = 'b';
46467 ret = 1;
46469 else
46471 clonei->vecsize_mangle = "bcd"[num];
46472 ret = 3;
46474 switch (clonei->vecsize_mangle)
46476 case 'b':
46477 clonei->vecsize_int = 128;
46478 clonei->vecsize_float = 128;
46479 break;
46480 case 'c':
46481 clonei->vecsize_int = 128;
46482 clonei->vecsize_float = 256;
46483 break;
46484 case 'd':
46485 clonei->vecsize_int = 256;
46486 clonei->vecsize_float = 256;
46487 break;
46489 if (clonei->simdlen == 0)
46491 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46492 clonei->simdlen = clonei->vecsize_int;
46493 else
46494 clonei->simdlen = clonei->vecsize_float;
46495 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46496 if (clonei->simdlen > 16)
46497 clonei->simdlen = 16;
46499 return ret;
46502 /* Add target attribute to SIMD clone NODE if needed. */
46504 static void
46505 ix86_simd_clone_adjust (struct cgraph_node *node)
46507 const char *str = NULL;
46508 gcc_assert (node->decl == cfun->decl);
46509 switch (node->simdclone->vecsize_mangle)
46511 case 'b':
46512 if (!TARGET_SSE2)
46513 str = "sse2";
46514 break;
46515 case 'c':
46516 if (!TARGET_AVX)
46517 str = "avx";
46518 break;
46519 case 'd':
46520 if (!TARGET_AVX2)
46521 str = "avx2";
46522 break;
46523 default:
46524 gcc_unreachable ();
46526 if (str == NULL)
46527 return;
46528 push_cfun (NULL);
46529 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46530 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46531 gcc_assert (ok);
46532 pop_cfun ();
46533 ix86_previous_fndecl = NULL_TREE;
46534 ix86_set_current_function (node->decl);
46537 /* If SIMD clone NODE can't be used in a vectorized loop
46538 in current function, return -1, otherwise return a badness of using it
46539 (0 if it is most desirable from vecsize_mangle point of view, 1
46540 slightly less desirable, etc.). */
46542 static int
46543 ix86_simd_clone_usable (struct cgraph_node *node)
46545 switch (node->simdclone->vecsize_mangle)
46547 case 'b':
46548 if (!TARGET_SSE2)
46549 return -1;
46550 if (!TARGET_AVX)
46551 return 0;
46552 return TARGET_AVX2 ? 2 : 1;
46553 case 'c':
46554 if (!TARGET_AVX)
46555 return -1;
46556 return TARGET_AVX2 ? 1 : 0;
46557 break;
46558 case 'd':
46559 if (!TARGET_AVX2)
46560 return -1;
46561 return 0;
46562 default:
46563 gcc_unreachable ();
46567 /* This function gives out the number of memory references.
46568 This value determines the unrolling factor for
46569 bdver3 and bdver4 architectures. */
46571 static int
46572 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46574 if (*x != NULL_RTX && MEM_P (*x))
46576 enum machine_mode mode;
46577 unsigned int n_words;
46579 mode = GET_MODE (*x);
46580 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46582 if (n_words > 4)
46583 (*mem_count)+=2;
46584 else
46585 (*mem_count)+=1;
46587 return 0;
46590 /* This function adjusts the unroll factor based on
46591 the hardware capabilities. For ex, bdver3 has
46592 a loop buffer which makes unrolling of smaller
46593 loops less important. This function decides the
46594 unroll factor using number of memory references
46595 (value 32 is used) as a heuristic. */
46597 static unsigned
46598 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46600 basic_block *bbs;
46601 rtx insn;
46602 unsigned i;
46603 unsigned mem_count = 0;
46605 if (!TARGET_ADJUST_UNROLL)
46606 return nunroll;
46608 /* Count the number of memory references within the loop body. */
46609 bbs = get_loop_body (loop);
46610 for (i = 0; i < loop->num_nodes; i++)
46612 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46613 if (NONDEBUG_INSN_P (insn))
46614 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46616 free (bbs);
46618 if (mem_count && mem_count <=32)
46619 return 32/mem_count;
46621 return nunroll;
46625 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46627 static bool
46628 ix86_float_exceptions_rounding_supported_p (void)
46630 /* For x87 floating point with standard excess precision handling,
46631 there is no adddf3 pattern (since x87 floating point only has
46632 XFmode operations) so the default hook implementation gets this
46633 wrong. */
46634 return TARGET_80387 || TARGET_SSE_MATH;
46637 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46639 static void
46640 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46642 if (!TARGET_80387 && !TARGET_SSE_MATH)
46643 return;
46644 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46645 if (TARGET_80387)
46647 tree fenv_index_type = build_index_type (size_int (6));
46648 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46649 tree fenv_var = create_tmp_var (fenv_type, NULL);
46650 mark_addressable (fenv_var);
46651 tree fenv_ptr = build_pointer_type (fenv_type);
46652 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46653 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46654 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46655 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46656 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46657 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46658 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46659 tree hold_fnclex = build_call_expr (fnclex, 0);
46660 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46661 hold_fnclex);
46662 *clear = build_call_expr (fnclex, 0);
46663 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46664 mark_addressable (sw_var);
46665 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46666 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46667 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46668 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46669 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46670 exceptions_var, exceptions_x87);
46671 *update = build2 (COMPOUND_EXPR, integer_type_node,
46672 fnstsw_call, update_mod);
46673 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46674 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46676 if (TARGET_SSE_MATH)
46678 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46679 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46680 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46681 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46682 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46683 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46684 mxcsr_orig_var, stmxcsr_hold_call);
46685 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46686 mxcsr_orig_var,
46687 build_int_cst (unsigned_type_node, 0x1f80));
46688 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46689 build_int_cst (unsigned_type_node, 0xffffffc0));
46690 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46691 mxcsr_mod_var, hold_mod_val);
46692 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46693 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46694 hold_assign_orig, hold_assign_mod);
46695 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46696 ldmxcsr_hold_call);
46697 if (*hold)
46698 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46699 else
46700 *hold = hold_all;
46701 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46702 if (*clear)
46703 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46704 ldmxcsr_clear_call);
46705 else
46706 *clear = ldmxcsr_clear_call;
46707 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46708 tree exceptions_sse = fold_convert (integer_type_node,
46709 stxmcsr_update_call);
46710 if (*update)
46712 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46713 exceptions_var, exceptions_sse);
46714 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46715 exceptions_var, exceptions_mod);
46716 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46717 exceptions_assign);
46719 else
46720 *update = build2 (MODIFY_EXPR, integer_type_node,
46721 exceptions_var, exceptions_sse);
46722 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46723 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46724 ldmxcsr_update_call);
46726 tree atomic_feraiseexcept
46727 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46728 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46729 1, exceptions_var);
46730 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46731 atomic_feraiseexcept_call);
46734 /* Initialize the GCC target structure. */
46735 #undef TARGET_RETURN_IN_MEMORY
46736 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46738 #undef TARGET_LEGITIMIZE_ADDRESS
46739 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46741 #undef TARGET_ATTRIBUTE_TABLE
46742 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46743 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46744 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46745 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46746 # undef TARGET_MERGE_DECL_ATTRIBUTES
46747 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46748 #endif
46750 #undef TARGET_COMP_TYPE_ATTRIBUTES
46751 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46753 #undef TARGET_INIT_BUILTINS
46754 #define TARGET_INIT_BUILTINS ix86_init_builtins
46755 #undef TARGET_BUILTIN_DECL
46756 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46757 #undef TARGET_EXPAND_BUILTIN
46758 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46760 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46761 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46762 ix86_builtin_vectorized_function
46764 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46765 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46767 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46768 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46770 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46771 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46773 #undef TARGET_BUILTIN_RECIPROCAL
46774 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46776 #undef TARGET_ASM_FUNCTION_EPILOGUE
46777 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46779 #undef TARGET_ENCODE_SECTION_INFO
46780 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46781 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46782 #else
46783 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46784 #endif
46786 #undef TARGET_ASM_OPEN_PAREN
46787 #define TARGET_ASM_OPEN_PAREN ""
46788 #undef TARGET_ASM_CLOSE_PAREN
46789 #define TARGET_ASM_CLOSE_PAREN ""
46791 #undef TARGET_ASM_BYTE_OP
46792 #define TARGET_ASM_BYTE_OP ASM_BYTE
46794 #undef TARGET_ASM_ALIGNED_HI_OP
46795 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46796 #undef TARGET_ASM_ALIGNED_SI_OP
46797 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46798 #ifdef ASM_QUAD
46799 #undef TARGET_ASM_ALIGNED_DI_OP
46800 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46801 #endif
46803 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46804 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46806 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46807 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46809 #undef TARGET_ASM_UNALIGNED_HI_OP
46810 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46811 #undef TARGET_ASM_UNALIGNED_SI_OP
46812 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46813 #undef TARGET_ASM_UNALIGNED_DI_OP
46814 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46816 #undef TARGET_PRINT_OPERAND
46817 #define TARGET_PRINT_OPERAND ix86_print_operand
46818 #undef TARGET_PRINT_OPERAND_ADDRESS
46819 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46820 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46821 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46822 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46823 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46825 #undef TARGET_SCHED_INIT_GLOBAL
46826 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46827 #undef TARGET_SCHED_ADJUST_COST
46828 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46829 #undef TARGET_SCHED_ISSUE_RATE
46830 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46831 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46832 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46833 ia32_multipass_dfa_lookahead
46834 #undef TARGET_SCHED_MACRO_FUSION_P
46835 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46836 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46837 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46839 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46840 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46842 #undef TARGET_MEMMODEL_CHECK
46843 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46845 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46846 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46848 #ifdef HAVE_AS_TLS
46849 #undef TARGET_HAVE_TLS
46850 #define TARGET_HAVE_TLS true
46851 #endif
46852 #undef TARGET_CANNOT_FORCE_CONST_MEM
46853 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46854 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46855 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46857 #undef TARGET_DELEGITIMIZE_ADDRESS
46858 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46860 #undef TARGET_MS_BITFIELD_LAYOUT_P
46861 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46863 #if TARGET_MACHO
46864 #undef TARGET_BINDS_LOCAL_P
46865 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46866 #endif
46867 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46868 #undef TARGET_BINDS_LOCAL_P
46869 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46870 #endif
46872 #undef TARGET_ASM_OUTPUT_MI_THUNK
46873 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46874 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
46875 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
46877 #undef TARGET_ASM_FILE_START
46878 #define TARGET_ASM_FILE_START x86_file_start
46880 #undef TARGET_OPTION_OVERRIDE
46881 #define TARGET_OPTION_OVERRIDE ix86_option_override
46883 #undef TARGET_REGISTER_MOVE_COST
46884 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
46885 #undef TARGET_MEMORY_MOVE_COST
46886 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
46887 #undef TARGET_RTX_COSTS
46888 #define TARGET_RTX_COSTS ix86_rtx_costs
46889 #undef TARGET_ADDRESS_COST
46890 #define TARGET_ADDRESS_COST ix86_address_cost
46892 #undef TARGET_FIXED_CONDITION_CODE_REGS
46893 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
46894 #undef TARGET_CC_MODES_COMPATIBLE
46895 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
46897 #undef TARGET_MACHINE_DEPENDENT_REORG
46898 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
46900 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
46901 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
46903 #undef TARGET_BUILD_BUILTIN_VA_LIST
46904 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
46906 #undef TARGET_FOLD_BUILTIN
46907 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
46909 #undef TARGET_COMPARE_VERSION_PRIORITY
46910 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
46912 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
46913 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
46914 ix86_generate_version_dispatcher_body
46916 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
46917 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
46918 ix86_get_function_versions_dispatcher
46920 #undef TARGET_ENUM_VA_LIST_P
46921 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
46923 #undef TARGET_FN_ABI_VA_LIST
46924 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
46926 #undef TARGET_CANONICAL_VA_LIST_TYPE
46927 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
46929 #undef TARGET_EXPAND_BUILTIN_VA_START
46930 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
46932 #undef TARGET_MD_ASM_CLOBBERS
46933 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
46935 #undef TARGET_PROMOTE_PROTOTYPES
46936 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
46937 #undef TARGET_SETUP_INCOMING_VARARGS
46938 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
46939 #undef TARGET_MUST_PASS_IN_STACK
46940 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
46941 #undef TARGET_FUNCTION_ARG_ADVANCE
46942 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
46943 #undef TARGET_FUNCTION_ARG
46944 #define TARGET_FUNCTION_ARG ix86_function_arg
46945 #undef TARGET_FUNCTION_ARG_BOUNDARY
46946 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
46947 #undef TARGET_PASS_BY_REFERENCE
46948 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
46949 #undef TARGET_INTERNAL_ARG_POINTER
46950 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
46951 #undef TARGET_UPDATE_STACK_BOUNDARY
46952 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
46953 #undef TARGET_GET_DRAP_RTX
46954 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
46955 #undef TARGET_STRICT_ARGUMENT_NAMING
46956 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
46957 #undef TARGET_STATIC_CHAIN
46958 #define TARGET_STATIC_CHAIN ix86_static_chain
46959 #undef TARGET_TRAMPOLINE_INIT
46960 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
46961 #undef TARGET_RETURN_POPS_ARGS
46962 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
46964 #undef TARGET_LEGITIMATE_COMBINED_INSN
46965 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
46967 #undef TARGET_ASAN_SHADOW_OFFSET
46968 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
46970 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
46971 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
46973 #undef TARGET_SCALAR_MODE_SUPPORTED_P
46974 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
46976 #undef TARGET_VECTOR_MODE_SUPPORTED_P
46977 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
46979 #undef TARGET_C_MODE_FOR_SUFFIX
46980 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
46982 #ifdef HAVE_AS_TLS
46983 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
46984 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
46985 #endif
46987 #ifdef SUBTARGET_INSERT_ATTRIBUTES
46988 #undef TARGET_INSERT_ATTRIBUTES
46989 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
46990 #endif
46992 #undef TARGET_MANGLE_TYPE
46993 #define TARGET_MANGLE_TYPE ix86_mangle_type
46995 #if !TARGET_MACHO
46996 #undef TARGET_STACK_PROTECT_FAIL
46997 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
46998 #endif
47000 #undef TARGET_FUNCTION_VALUE
47001 #define TARGET_FUNCTION_VALUE ix86_function_value
47003 #undef TARGET_FUNCTION_VALUE_REGNO_P
47004 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47006 #undef TARGET_PROMOTE_FUNCTION_MODE
47007 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47009 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47010 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47012 #undef TARGET_INSTANTIATE_DECLS
47013 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47015 #undef TARGET_SECONDARY_RELOAD
47016 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47018 #undef TARGET_CLASS_MAX_NREGS
47019 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47021 #undef TARGET_PREFERRED_RELOAD_CLASS
47022 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47023 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47024 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47025 #undef TARGET_CLASS_LIKELY_SPILLED_P
47026 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47028 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47029 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47030 ix86_builtin_vectorization_cost
47031 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47032 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47033 ix86_vectorize_vec_perm_const_ok
47034 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47035 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47036 ix86_preferred_simd_mode
47037 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47038 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47039 ix86_autovectorize_vector_sizes
47040 #undef TARGET_VECTORIZE_INIT_COST
47041 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47042 #undef TARGET_VECTORIZE_ADD_STMT_COST
47043 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47044 #undef TARGET_VECTORIZE_FINISH_COST
47045 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47046 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47047 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47049 #undef TARGET_SET_CURRENT_FUNCTION
47050 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47052 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47053 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47055 #undef TARGET_OPTION_SAVE
47056 #define TARGET_OPTION_SAVE ix86_function_specific_save
47058 #undef TARGET_OPTION_RESTORE
47059 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47061 #undef TARGET_OPTION_PRINT
47062 #define TARGET_OPTION_PRINT ix86_function_specific_print
47064 #undef TARGET_OPTION_FUNCTION_VERSIONS
47065 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47067 #undef TARGET_CAN_INLINE_P
47068 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47070 #undef TARGET_EXPAND_TO_RTL_HOOK
47071 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47073 #undef TARGET_LEGITIMATE_ADDRESS_P
47074 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47076 #undef TARGET_LRA_P
47077 #define TARGET_LRA_P hook_bool_void_true
47079 #undef TARGET_REGISTER_PRIORITY
47080 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47082 #undef TARGET_REGISTER_USAGE_LEVELING_P
47083 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47085 #undef TARGET_LEGITIMATE_CONSTANT_P
47086 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47088 #undef TARGET_FRAME_POINTER_REQUIRED
47089 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47091 #undef TARGET_CAN_ELIMINATE
47092 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47094 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47095 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47097 #undef TARGET_ASM_CODE_END
47098 #define TARGET_ASM_CODE_END ix86_code_end
47100 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47101 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47103 #if TARGET_MACHO
47104 #undef TARGET_INIT_LIBFUNCS
47105 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47106 #endif
47108 #undef TARGET_LOOP_UNROLL_ADJUST
47109 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47111 #undef TARGET_SPILL_CLASS
47112 #define TARGET_SPILL_CLASS ix86_spill_class
47114 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47115 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47116 ix86_simd_clone_compute_vecsize_and_simdlen
47118 #undef TARGET_SIMD_CLONE_ADJUST
47119 #define TARGET_SIMD_CLONE_ADJUST \
47120 ix86_simd_clone_adjust
47122 #undef TARGET_SIMD_CLONE_USABLE
47123 #define TARGET_SIMD_CLONE_USABLE \
47124 ix86_simd_clone_usable
47126 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47127 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47128 ix86_float_exceptions_rounding_supported_p
47130 struct gcc_target targetm = TARGET_INITIALIZER;
47132 #include "gt-i386.h"