PR target/59794
[official-gcc.git] / gcc / config / i386 / i386.c
blob0d30eb017b3395398eaff8b3ec7b937f3e57d666
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
85 static rtx legitimize_dllimport_symbol (rtx, bool);
86 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
87 static rtx legitimize_pe_coff_symbol (rtx, bool);
89 #ifndef CHECK_STACK_LIMIT
90 #define CHECK_STACK_LIMIT (-1)
91 #endif
93 /* Return index of given mode in mult and division cost tables. */
94 #define MODE_INDEX(mode) \
95 ((mode) == QImode ? 0 \
96 : (mode) == HImode ? 1 \
97 : (mode) == SImode ? 2 \
98 : (mode) == DImode ? 3 \
99 : 4)
101 /* Processor costs (relative to an add) */
102 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
103 #define COSTS_N_BYTES(N) ((N) * 2)
105 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
107 static stringop_algs ix86_size_memcpy[2] = {
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
110 static stringop_algs ix86_size_memset[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 const
115 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
116 COSTS_N_BYTES (2), /* cost of an add instruction */
117 COSTS_N_BYTES (3), /* cost of a lea instruction */
118 COSTS_N_BYTES (2), /* variable shift costs */
119 COSTS_N_BYTES (3), /* constant shift costs */
120 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
121 COSTS_N_BYTES (3), /* HI */
122 COSTS_N_BYTES (3), /* SI */
123 COSTS_N_BYTES (3), /* DI */
124 COSTS_N_BYTES (5)}, /* other */
125 0, /* cost of multiply per each bit set */
126 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
127 COSTS_N_BYTES (3), /* HI */
128 COSTS_N_BYTES (3), /* SI */
129 COSTS_N_BYTES (3), /* DI */
130 COSTS_N_BYTES (5)}, /* other */
131 COSTS_N_BYTES (3), /* cost of movsx */
132 COSTS_N_BYTES (3), /* cost of movzx */
133 0, /* "large" insn */
134 2, /* MOVE_RATIO */
135 2, /* cost for loading QImode using movzbl */
136 {2, 2, 2}, /* cost of loading integer registers
137 in QImode, HImode and SImode.
138 Relative to reg-reg move (2). */
139 {2, 2, 2}, /* cost of storing integer registers */
140 2, /* cost of reg,reg fld/fst */
141 {2, 2, 2}, /* cost of loading fp registers
142 in SFmode, DFmode and XFmode */
143 {2, 2, 2}, /* cost of storing fp registers
144 in SFmode, DFmode and XFmode */
145 3, /* cost of moving MMX register */
146 {3, 3}, /* cost of loading MMX registers
147 in SImode and DImode */
148 {3, 3}, /* cost of storing MMX registers
149 in SImode and DImode */
150 3, /* cost of moving SSE register */
151 {3, 3, 3}, /* cost of loading SSE registers
152 in SImode, DImode and TImode */
153 {3, 3, 3}, /* cost of storing SSE registers
154 in SImode, DImode and TImode */
155 3, /* MMX or SSE register to integer */
156 0, /* size of l1 cache */
157 0, /* size of l2 cache */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 2, /* Branch cost */
161 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
162 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
163 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
164 COSTS_N_BYTES (2), /* cost of FABS instruction. */
165 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
166 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 ix86_size_memcpy,
168 ix86_size_memset,
169 1, /* scalar_stmt_cost. */
170 1, /* scalar load_cost. */
171 1, /* scalar_store_cost. */
172 1, /* vec_stmt_cost. */
173 1, /* vec_to_scalar_cost. */
174 1, /* scalar_to_vec_cost. */
175 1, /* vec_align_load_cost. */
176 1, /* vec_unalign_load_cost. */
177 1, /* vec_store_cost. */
178 1, /* cond_taken_branch_cost. */
179 1, /* cond_not_taken_branch_cost. */
182 /* Processor costs (relative to an add) */
183 static stringop_algs i386_memcpy[2] = {
184 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
185 DUMMY_STRINGOP_ALGS};
186 static stringop_algs i386_memset[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
190 static const
191 struct processor_costs i386_cost = { /* 386 specific costs */
192 COSTS_N_INSNS (1), /* cost of an add instruction */
193 COSTS_N_INSNS (1), /* cost of a lea instruction */
194 COSTS_N_INSNS (3), /* variable shift costs */
195 COSTS_N_INSNS (2), /* constant shift costs */
196 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
197 COSTS_N_INSNS (6), /* HI */
198 COSTS_N_INSNS (6), /* SI */
199 COSTS_N_INSNS (6), /* DI */
200 COSTS_N_INSNS (6)}, /* other */
201 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
202 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
203 COSTS_N_INSNS (23), /* HI */
204 COSTS_N_INSNS (23), /* SI */
205 COSTS_N_INSNS (23), /* DI */
206 COSTS_N_INSNS (23)}, /* other */
207 COSTS_N_INSNS (3), /* cost of movsx */
208 COSTS_N_INSNS (2), /* cost of movzx */
209 15, /* "large" insn */
210 3, /* MOVE_RATIO */
211 4, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {8, 8, 8}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {8, 8, 8}, /* cost of storing fp registers
220 in SFmode, DFmode and XFmode */
221 2, /* cost of moving MMX register */
222 {4, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {4, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of l1 cache */
233 0, /* size of l2 cache */
234 0, /* size of prefetch block */
235 0, /* number of parallel prefetches */
236 1, /* Branch cost */
237 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
238 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
239 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
240 COSTS_N_INSNS (22), /* cost of FABS instruction. */
241 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
242 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 i386_memcpy,
244 i386_memset,
245 1, /* scalar_stmt_cost. */
246 1, /* scalar load_cost. */
247 1, /* scalar_store_cost. */
248 1, /* vec_stmt_cost. */
249 1, /* vec_to_scalar_cost. */
250 1, /* scalar_to_vec_cost. */
251 1, /* vec_align_load_cost. */
252 2, /* vec_unalign_load_cost. */
253 1, /* vec_store_cost. */
254 3, /* cond_taken_branch_cost. */
255 1, /* cond_not_taken_branch_cost. */
258 static stringop_algs i486_memcpy[2] = {
259 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
260 DUMMY_STRINGOP_ALGS};
261 static stringop_algs i486_memset[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
265 static const
266 struct processor_costs i486_cost = { /* 486 specific costs */
267 COSTS_N_INSNS (1), /* cost of an add instruction */
268 COSTS_N_INSNS (1), /* cost of a lea instruction */
269 COSTS_N_INSNS (3), /* variable shift costs */
270 COSTS_N_INSNS (2), /* constant shift costs */
271 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
272 COSTS_N_INSNS (12), /* HI */
273 COSTS_N_INSNS (12), /* SI */
274 COSTS_N_INSNS (12), /* DI */
275 COSTS_N_INSNS (12)}, /* other */
276 1, /* cost of multiply per each bit set */
277 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
278 COSTS_N_INSNS (40), /* HI */
279 COSTS_N_INSNS (40), /* SI */
280 COSTS_N_INSNS (40), /* DI */
281 COSTS_N_INSNS (40)}, /* other */
282 COSTS_N_INSNS (3), /* cost of movsx */
283 COSTS_N_INSNS (2), /* cost of movzx */
284 15, /* "large" insn */
285 3, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {2, 4, 2}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 4, 2}, /* cost of storing integer registers */
291 2, /* cost of reg,reg fld/fst */
292 {8, 8, 8}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {8, 8, 8}, /* cost of storing fp registers
295 in SFmode, DFmode and XFmode */
296 2, /* cost of moving MMX register */
297 {4, 8}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {4, 8}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {4, 8, 16}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {4, 8, 16}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 3, /* MMX or SSE register to integer */
307 4, /* size of l1 cache. 486 has 8kB cache
308 shared for code and data, so 4kB is
309 not really precise. */
310 4, /* size of l2 cache */
311 0, /* size of prefetch block */
312 0, /* number of parallel prefetches */
313 1, /* Branch cost */
314 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
315 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
316 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
319 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 i486_memcpy,
321 i486_memset,
322 1, /* scalar_stmt_cost. */
323 1, /* scalar load_cost. */
324 1, /* scalar_store_cost. */
325 1, /* vec_stmt_cost. */
326 1, /* vec_to_scalar_cost. */
327 1, /* scalar_to_vec_cost. */
328 1, /* vec_align_load_cost. */
329 2, /* vec_unalign_load_cost. */
330 1, /* vec_store_cost. */
331 3, /* cond_taken_branch_cost. */
332 1, /* cond_not_taken_branch_cost. */
335 static stringop_algs pentium_memcpy[2] = {
336 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
337 DUMMY_STRINGOP_ALGS};
338 static stringop_algs pentium_memset[2] = {
339 {libcall, {{-1, rep_prefix_4_byte, false}}},
340 DUMMY_STRINGOP_ALGS};
342 static const
343 struct processor_costs pentium_cost = {
344 COSTS_N_INSNS (1), /* cost of an add instruction */
345 COSTS_N_INSNS (1), /* cost of a lea instruction */
346 COSTS_N_INSNS (4), /* variable shift costs */
347 COSTS_N_INSNS (1), /* constant shift costs */
348 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
349 COSTS_N_INSNS (11), /* HI */
350 COSTS_N_INSNS (11), /* SI */
351 COSTS_N_INSNS (11), /* DI */
352 COSTS_N_INSNS (11)}, /* other */
353 0, /* cost of multiply per each bit set */
354 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
355 COSTS_N_INSNS (25), /* HI */
356 COSTS_N_INSNS (25), /* SI */
357 COSTS_N_INSNS (25), /* DI */
358 COSTS_N_INSNS (25)}, /* other */
359 COSTS_N_INSNS (3), /* cost of movsx */
360 COSTS_N_INSNS (2), /* cost of movzx */
361 8, /* "large" insn */
362 6, /* MOVE_RATIO */
363 6, /* cost for loading QImode using movzbl */
364 {2, 4, 2}, /* cost of loading integer registers
365 in QImode, HImode and SImode.
366 Relative to reg-reg move (2). */
367 {2, 4, 2}, /* cost of storing integer registers */
368 2, /* cost of reg,reg fld/fst */
369 {2, 2, 6}, /* cost of loading fp registers
370 in SFmode, DFmode and XFmode */
371 {4, 4, 6}, /* cost of storing fp registers
372 in SFmode, DFmode and XFmode */
373 8, /* cost of moving MMX register */
374 {8, 8}, /* cost of loading MMX registers
375 in SImode and DImode */
376 {8, 8}, /* cost of storing MMX registers
377 in SImode and DImode */
378 2, /* cost of moving SSE register */
379 {4, 8, 16}, /* cost of loading SSE registers
380 in SImode, DImode and TImode */
381 {4, 8, 16}, /* cost of storing SSE registers
382 in SImode, DImode and TImode */
383 3, /* MMX or SSE register to integer */
384 8, /* size of l1 cache. */
385 8, /* size of l2 cache */
386 0, /* size of prefetch block */
387 0, /* number of parallel prefetches */
388 2, /* Branch cost */
389 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
390 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
391 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
392 COSTS_N_INSNS (1), /* cost of FABS instruction. */
393 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
394 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 pentium_memcpy,
396 pentium_memset,
397 1, /* scalar_stmt_cost. */
398 1, /* scalar load_cost. */
399 1, /* scalar_store_cost. */
400 1, /* vec_stmt_cost. */
401 1, /* vec_to_scalar_cost. */
402 1, /* scalar_to_vec_cost. */
403 1, /* vec_align_load_cost. */
404 2, /* vec_unalign_load_cost. */
405 1, /* vec_store_cost. */
406 3, /* cond_taken_branch_cost. */
407 1, /* cond_not_taken_branch_cost. */
410 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
411 (we ensure the alignment). For small blocks inline loop is still a
412 noticeable win, for bigger blocks either rep movsl or rep movsb is
413 way to go. Rep movsb has apparently more expensive startup time in CPU,
414 but after 4K the difference is down in the noise. */
415 static stringop_algs pentiumpro_memcpy[2] = {
416 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
417 {8192, rep_prefix_4_byte, false},
418 {-1, rep_prefix_1_byte, false}}},
419 DUMMY_STRINGOP_ALGS};
420 static stringop_algs pentiumpro_memset[2] = {
421 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
422 {8192, rep_prefix_4_byte, false},
423 {-1, libcall, false}}},
424 DUMMY_STRINGOP_ALGS};
425 static const
426 struct processor_costs pentiumpro_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (1), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (4), /* HI */
433 COSTS_N_INSNS (4), /* SI */
434 COSTS_N_INSNS (4), /* DI */
435 COSTS_N_INSNS (4)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (17), /* HI */
439 COSTS_N_INSNS (17), /* SI */
440 COSTS_N_INSNS (17), /* DI */
441 COSTS_N_INSNS (17)}, /* other */
442 COSTS_N_INSNS (1), /* cost of movsx */
443 COSTS_N_INSNS (1), /* cost of movzx */
444 8, /* "large" insn */
445 6, /* MOVE_RATIO */
446 2, /* cost for loading QImode using movzbl */
447 {4, 4, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 2, 2}, /* cost of storing integer registers */
451 2, /* cost of reg,reg fld/fst */
452 {2, 2, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 6}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 3, /* MMX or SSE register to integer */
467 8, /* size of l1 cache. */
468 256, /* size of l2 cache */
469 32, /* size of prefetch block */
470 6, /* number of parallel prefetches */
471 2, /* Branch cost */
472 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
473 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
474 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
475 COSTS_N_INSNS (2), /* cost of FABS instruction. */
476 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
477 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
478 pentiumpro_memcpy,
479 pentiumpro_memset,
480 1, /* scalar_stmt_cost. */
481 1, /* scalar load_cost. */
482 1, /* scalar_store_cost. */
483 1, /* vec_stmt_cost. */
484 1, /* vec_to_scalar_cost. */
485 1, /* scalar_to_vec_cost. */
486 1, /* vec_align_load_cost. */
487 2, /* vec_unalign_load_cost. */
488 1, /* vec_store_cost. */
489 3, /* cond_taken_branch_cost. */
490 1, /* cond_not_taken_branch_cost. */
493 static stringop_algs geode_memcpy[2] = {
494 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
495 DUMMY_STRINGOP_ALGS};
496 static stringop_algs geode_memset[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static const
500 struct processor_costs geode_cost = {
501 COSTS_N_INSNS (1), /* cost of an add instruction */
502 COSTS_N_INSNS (1), /* cost of a lea instruction */
503 COSTS_N_INSNS (2), /* variable shift costs */
504 COSTS_N_INSNS (1), /* constant shift costs */
505 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
506 COSTS_N_INSNS (4), /* HI */
507 COSTS_N_INSNS (7), /* SI */
508 COSTS_N_INSNS (7), /* DI */
509 COSTS_N_INSNS (7)}, /* other */
510 0, /* cost of multiply per each bit set */
511 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
512 COSTS_N_INSNS (23), /* HI */
513 COSTS_N_INSNS (39), /* SI */
514 COSTS_N_INSNS (39), /* DI */
515 COSTS_N_INSNS (39)}, /* other */
516 COSTS_N_INSNS (1), /* cost of movsx */
517 COSTS_N_INSNS (1), /* cost of movzx */
518 8, /* "large" insn */
519 4, /* MOVE_RATIO */
520 1, /* cost for loading QImode using movzbl */
521 {1, 1, 1}, /* cost of loading integer registers
522 in QImode, HImode and SImode.
523 Relative to reg-reg move (2). */
524 {1, 1, 1}, /* cost of storing integer registers */
525 1, /* cost of reg,reg fld/fst */
526 {1, 1, 1}, /* cost of loading fp registers
527 in SFmode, DFmode and XFmode */
528 {4, 6, 6}, /* cost of storing fp registers
529 in SFmode, DFmode and XFmode */
531 1, /* cost of moving MMX register */
532 {1, 1}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {1, 1}, /* cost of storing MMX registers
535 in SImode and DImode */
536 1, /* cost of moving SSE register */
537 {1, 1, 1}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {1, 1, 1}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 1, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 128, /* size of l2 cache. */
544 32, /* size of prefetch block */
545 1, /* number of parallel prefetches */
546 1, /* Branch cost */
547 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (1), /* cost of FABS instruction. */
551 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
553 geode_memcpy,
554 geode_memset,
555 1, /* scalar_stmt_cost. */
556 1, /* scalar load_cost. */
557 1, /* scalar_store_cost. */
558 1, /* vec_stmt_cost. */
559 1, /* vec_to_scalar_cost. */
560 1, /* scalar_to_vec_cost. */
561 1, /* vec_align_load_cost. */
562 2, /* vec_unalign_load_cost. */
563 1, /* vec_store_cost. */
564 3, /* cond_taken_branch_cost. */
565 1, /* cond_not_taken_branch_cost. */
568 static stringop_algs k6_memcpy[2] = {
569 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
570 DUMMY_STRINGOP_ALGS};
571 static stringop_algs k6_memset[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static const
575 struct processor_costs k6_cost = {
576 COSTS_N_INSNS (1), /* cost of an add instruction */
577 COSTS_N_INSNS (2), /* cost of a lea instruction */
578 COSTS_N_INSNS (1), /* variable shift costs */
579 COSTS_N_INSNS (1), /* constant shift costs */
580 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
581 COSTS_N_INSNS (3), /* HI */
582 COSTS_N_INSNS (3), /* SI */
583 COSTS_N_INSNS (3), /* DI */
584 COSTS_N_INSNS (3)}, /* other */
585 0, /* cost of multiply per each bit set */
586 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
587 COSTS_N_INSNS (18), /* HI */
588 COSTS_N_INSNS (18), /* SI */
589 COSTS_N_INSNS (18), /* DI */
590 COSTS_N_INSNS (18)}, /* other */
591 COSTS_N_INSNS (2), /* cost of movsx */
592 COSTS_N_INSNS (2), /* cost of movzx */
593 8, /* "large" insn */
594 4, /* MOVE_RATIO */
595 3, /* cost for loading QImode using movzbl */
596 {4, 5, 4}, /* cost of loading integer registers
597 in QImode, HImode and SImode.
598 Relative to reg-reg move (2). */
599 {2, 3, 2}, /* cost of storing integer registers */
600 4, /* cost of reg,reg fld/fst */
601 {6, 6, 6}, /* cost of loading fp registers
602 in SFmode, DFmode and XFmode */
603 {4, 4, 4}, /* cost of storing fp registers
604 in SFmode, DFmode and XFmode */
605 2, /* cost of moving MMX register */
606 {2, 2}, /* cost of loading MMX registers
607 in SImode and DImode */
608 {2, 2}, /* cost of storing MMX registers
609 in SImode and DImode */
610 2, /* cost of moving SSE register */
611 {2, 2, 8}, /* cost of loading SSE registers
612 in SImode, DImode and TImode */
613 {2, 2, 8}, /* cost of storing SSE registers
614 in SImode, DImode and TImode */
615 6, /* MMX or SSE register to integer */
616 32, /* size of l1 cache. */
617 32, /* size of l2 cache. Some models
618 have integrated l2 cache, but
619 optimizing for k6 is not important
620 enough to worry about that. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (2), /* cost of FABS instruction. */
628 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
630 k6_memcpy,
631 k6_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 static stringop_algs athlon_memcpy[2] = {
649 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static stringop_algs athlon_memset[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static const
655 struct processor_costs athlon_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (5), /* HI */
662 COSTS_N_INSNS (5), /* SI */
663 COSTS_N_INSNS (5), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
674 9, /* MOVE_RATIO */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 4}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 4, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 256, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 6, /* number of parallel prefetches */
700 5, /* Branch cost */
701 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
707 athlon_memcpy,
708 athlon_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
722 /* K8 has optimized REP instruction for medium sized blocks, but for very
723 small blocks it is better to use loop. For large blocks, libcall can
724 do nontemporary accesses and beat inline considerably. */
725 static stringop_algs k8_memcpy[2] = {
726 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
727 {-1, rep_prefix_4_byte, false}}},
728 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
729 {-1, libcall, false}}}};
730 static stringop_algs k8_memset[2] = {
731 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
732 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
733 {libcall, {{48, unrolled_loop, false},
734 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
735 static const
736 struct processor_costs k8_cost = {
737 COSTS_N_INSNS (1), /* cost of an add instruction */
738 COSTS_N_INSNS (2), /* cost of a lea instruction */
739 COSTS_N_INSNS (1), /* variable shift costs */
740 COSTS_N_INSNS (1), /* constant shift costs */
741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
742 COSTS_N_INSNS (4), /* HI */
743 COSTS_N_INSNS (3), /* SI */
744 COSTS_N_INSNS (4), /* DI */
745 COSTS_N_INSNS (5)}, /* other */
746 0, /* cost of multiply per each bit set */
747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
748 COSTS_N_INSNS (26), /* HI */
749 COSTS_N_INSNS (42), /* SI */
750 COSTS_N_INSNS (74), /* DI */
751 COSTS_N_INSNS (74)}, /* other */
752 COSTS_N_INSNS (1), /* cost of movsx */
753 COSTS_N_INSNS (1), /* cost of movzx */
754 8, /* "large" insn */
755 9, /* MOVE_RATIO */
756 4, /* cost for loading QImode using movzbl */
757 {3, 4, 3}, /* cost of loading integer registers
758 in QImode, HImode and SImode.
759 Relative to reg-reg move (2). */
760 {3, 4, 3}, /* cost of storing integer registers */
761 4, /* cost of reg,reg fld/fst */
762 {4, 4, 12}, /* cost of loading fp registers
763 in SFmode, DFmode and XFmode */
764 {6, 6, 8}, /* cost of storing fp registers
765 in SFmode, DFmode and XFmode */
766 2, /* cost of moving MMX register */
767 {3, 3}, /* cost of loading MMX registers
768 in SImode and DImode */
769 {4, 4}, /* cost of storing MMX registers
770 in SImode and DImode */
771 2, /* cost of moving SSE register */
772 {4, 3, 6}, /* cost of loading SSE registers
773 in SImode, DImode and TImode */
774 {4, 4, 5}, /* cost of storing SSE registers
775 in SImode, DImode and TImode */
776 5, /* MMX or SSE register to integer */
777 64, /* size of l1 cache. */
778 512, /* size of l2 cache. */
779 64, /* size of prefetch block */
780 /* New AMD processors never drop prefetches; if they cannot be performed
781 immediately, they are queued. We set number of simultaneous prefetches
782 to a large constant to reflect this (it probably is not a good idea not
783 to limit number of prefetches at all, as their execution also takes some
784 time). */
785 100, /* number of parallel prefetches */
786 3, /* Branch cost */
787 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
788 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
789 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
790 COSTS_N_INSNS (2), /* cost of FABS instruction. */
791 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
792 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
794 k8_memcpy,
795 k8_memset,
796 4, /* scalar_stmt_cost. */
797 2, /* scalar load_cost. */
798 2, /* scalar_store_cost. */
799 5, /* vec_stmt_cost. */
800 0, /* vec_to_scalar_cost. */
801 2, /* scalar_to_vec_cost. */
802 2, /* vec_align_load_cost. */
803 3, /* vec_unalign_load_cost. */
804 3, /* vec_store_cost. */
805 3, /* cond_taken_branch_cost. */
806 2, /* cond_not_taken_branch_cost. */
809 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
810 very small blocks it is better to use loop. For large blocks, libcall can
811 do nontemporary accesses and beat inline considerably. */
812 static stringop_algs amdfam10_memcpy[2] = {
813 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
814 {-1, rep_prefix_4_byte, false}}},
815 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
816 {-1, libcall, false}}}};
817 static stringop_algs amdfam10_memset[2] = {
818 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
819 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
820 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
821 {-1, libcall, false}}}};
822 struct processor_costs amdfam10_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 512, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 amdfam10_memcpy,
889 amdfam10_memset,
890 4, /* scalar_stmt_cost. */
891 2, /* scalar load_cost. */
892 2, /* scalar_store_cost. */
893 6, /* vec_stmt_cost. */
894 0, /* vec_to_scalar_cost. */
895 2, /* scalar_to_vec_cost. */
896 2, /* vec_align_load_cost. */
897 2, /* vec_unalign_load_cost. */
898 2, /* vec_store_cost. */
899 2, /* cond_taken_branch_cost. */
900 1, /* cond_not_taken_branch_cost. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 static stringop_algs bdver1_memcpy[2] = {
907 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
908 {-1, rep_prefix_4_byte, false}}},
909 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
910 {-1, libcall, false}}}};
911 static stringop_algs bdver1_memset[2] = {
912 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
914 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
915 {-1, libcall, false}}}};
917 const struct processor_costs bdver1_cost = {
918 COSTS_N_INSNS (1), /* cost of an add instruction */
919 COSTS_N_INSNS (1), /* cost of a lea instruction */
920 COSTS_N_INSNS (1), /* variable shift costs */
921 COSTS_N_INSNS (1), /* constant shift costs */
922 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
923 COSTS_N_INSNS (4), /* HI */
924 COSTS_N_INSNS (4), /* SI */
925 COSTS_N_INSNS (6), /* DI */
926 COSTS_N_INSNS (6)}, /* other */
927 0, /* cost of multiply per each bit set */
928 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
929 COSTS_N_INSNS (35), /* HI */
930 COSTS_N_INSNS (51), /* SI */
931 COSTS_N_INSNS (83), /* DI */
932 COSTS_N_INSNS (83)}, /* other */
933 COSTS_N_INSNS (1), /* cost of movsx */
934 COSTS_N_INSNS (1), /* cost of movzx */
935 8, /* "large" insn */
936 9, /* MOVE_RATIO */
937 4, /* cost for loading QImode using movzbl */
938 {5, 5, 4}, /* cost of loading integer registers
939 in QImode, HImode and SImode.
940 Relative to reg-reg move (2). */
941 {4, 4, 4}, /* cost of storing integer registers */
942 2, /* cost of reg,reg fld/fst */
943 {5, 5, 12}, /* cost of loading fp registers
944 in SFmode, DFmode and XFmode */
945 {4, 4, 8}, /* cost of storing fp registers
946 in SFmode, DFmode and XFmode */
947 2, /* cost of moving MMX register */
948 {4, 4}, /* cost of loading MMX registers
949 in SImode and DImode */
950 {4, 4}, /* cost of storing MMX registers
951 in SImode and DImode */
952 2, /* cost of moving SSE register */
953 {4, 4, 4}, /* cost of loading SSE registers
954 in SImode, DImode and TImode */
955 {4, 4, 4}, /* cost of storing SSE registers
956 in SImode, DImode and TImode */
957 2, /* MMX or SSE register to integer */
958 /* On K8:
959 MOVD reg64, xmmreg Double FSTORE 4
960 MOVD reg32, xmmreg Double FSTORE 4
961 On AMDFAM10:
962 MOVD reg64, xmmreg Double FADD 3
963 1/1 1/1
964 MOVD reg32, xmmreg Double FADD 3
965 1/1 1/1 */
966 16, /* size of l1 cache. */
967 2048, /* size of l2 cache. */
968 64, /* size of prefetch block */
969 /* New AMD processors never drop prefetches; if they cannot be performed
970 immediately, they are queued. We set number of simultaneous prefetches
971 to a large constant to reflect this (it probably is not a good idea not
972 to limit number of prefetches at all, as their execution also takes some
973 time). */
974 100, /* number of parallel prefetches */
975 2, /* Branch cost */
976 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
977 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
978 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
979 COSTS_N_INSNS (2), /* cost of FABS instruction. */
980 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
981 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
983 bdver1_memcpy,
984 bdver1_memset,
985 6, /* scalar_stmt_cost. */
986 4, /* scalar load_cost. */
987 4, /* scalar_store_cost. */
988 6, /* vec_stmt_cost. */
989 0, /* vec_to_scalar_cost. */
990 2, /* scalar_to_vec_cost. */
991 4, /* vec_align_load_cost. */
992 4, /* vec_unalign_load_cost. */
993 4, /* vec_store_cost. */
994 2, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
998 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
999 very small blocks it is better to use loop. For large blocks, libcall
1000 can do nontemporary accesses and beat inline considerably. */
1002 static stringop_algs bdver2_memcpy[2] = {
1003 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1004 {-1, rep_prefix_4_byte, false}}},
1005 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1006 {-1, libcall, false}}}};
1007 static stringop_algs bdver2_memset[2] = {
1008 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1009 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1010 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1011 {-1, libcall, false}}}};
1013 const struct processor_costs bdver2_cost = {
1014 COSTS_N_INSNS (1), /* cost of an add instruction */
1015 COSTS_N_INSNS (1), /* cost of a lea instruction */
1016 COSTS_N_INSNS (1), /* variable shift costs */
1017 COSTS_N_INSNS (1), /* constant shift costs */
1018 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1019 COSTS_N_INSNS (4), /* HI */
1020 COSTS_N_INSNS (4), /* SI */
1021 COSTS_N_INSNS (6), /* DI */
1022 COSTS_N_INSNS (6)}, /* other */
1023 0, /* cost of multiply per each bit set */
1024 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1025 COSTS_N_INSNS (35), /* HI */
1026 COSTS_N_INSNS (51), /* SI */
1027 COSTS_N_INSNS (83), /* DI */
1028 COSTS_N_INSNS (83)}, /* other */
1029 COSTS_N_INSNS (1), /* cost of movsx */
1030 COSTS_N_INSNS (1), /* cost of movzx */
1031 8, /* "large" insn */
1032 9, /* MOVE_RATIO */
1033 4, /* cost for loading QImode using movzbl */
1034 {5, 5, 4}, /* cost of loading integer registers
1035 in QImode, HImode and SImode.
1036 Relative to reg-reg move (2). */
1037 {4, 4, 4}, /* cost of storing integer registers */
1038 2, /* cost of reg,reg fld/fst */
1039 {5, 5, 12}, /* cost of loading fp registers
1040 in SFmode, DFmode and XFmode */
1041 {4, 4, 8}, /* cost of storing fp registers
1042 in SFmode, DFmode and XFmode */
1043 2, /* cost of moving MMX register */
1044 {4, 4}, /* cost of loading MMX registers
1045 in SImode and DImode */
1046 {4, 4}, /* cost of storing MMX registers
1047 in SImode and DImode */
1048 2, /* cost of moving SSE register */
1049 {4, 4, 4}, /* cost of loading SSE registers
1050 in SImode, DImode and TImode */
1051 {4, 4, 4}, /* cost of storing SSE registers
1052 in SImode, DImode and TImode */
1053 2, /* MMX or SSE register to integer */
1054 /* On K8:
1055 MOVD reg64, xmmreg Double FSTORE 4
1056 MOVD reg32, xmmreg Double FSTORE 4
1057 On AMDFAM10:
1058 MOVD reg64, xmmreg Double FADD 3
1059 1/1 1/1
1060 MOVD reg32, xmmreg Double FADD 3
1061 1/1 1/1 */
1062 16, /* size of l1 cache. */
1063 2048, /* size of l2 cache. */
1064 64, /* size of prefetch block */
1065 /* New AMD processors never drop prefetches; if they cannot be performed
1066 immediately, they are queued. We set number of simultaneous prefetches
1067 to a large constant to reflect this (it probably is not a good idea not
1068 to limit number of prefetches at all, as their execution also takes some
1069 time). */
1070 100, /* number of parallel prefetches */
1071 2, /* Branch cost */
1072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1073 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1074 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1075 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1076 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1077 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1079 bdver2_memcpy,
1080 bdver2_memset,
1081 6, /* scalar_stmt_cost. */
1082 4, /* scalar load_cost. */
1083 4, /* scalar_store_cost. */
1084 6, /* vec_stmt_cost. */
1085 0, /* vec_to_scalar_cost. */
1086 2, /* scalar_to_vec_cost. */
1087 4, /* vec_align_load_cost. */
1088 4, /* vec_unalign_load_cost. */
1089 4, /* vec_store_cost. */
1090 2, /* cond_taken_branch_cost. */
1091 1, /* cond_not_taken_branch_cost. */
1095 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1096 very small blocks it is better to use loop. For large blocks, libcall
1097 can do nontemporary accesses and beat inline considerably. */
1098 static stringop_algs bdver3_memcpy[2] = {
1099 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1100 {-1, rep_prefix_4_byte, false}}},
1101 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1102 {-1, libcall, false}}}};
1103 static stringop_algs bdver3_memset[2] = {
1104 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1105 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1106 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1107 {-1, libcall, false}}}};
1108 struct processor_costs bdver3_cost = {
1109 COSTS_N_INSNS (1), /* cost of an add instruction */
1110 COSTS_N_INSNS (1), /* cost of a lea instruction */
1111 COSTS_N_INSNS (1), /* variable shift costs */
1112 COSTS_N_INSNS (1), /* constant shift costs */
1113 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1114 COSTS_N_INSNS (4), /* HI */
1115 COSTS_N_INSNS (4), /* SI */
1116 COSTS_N_INSNS (6), /* DI */
1117 COSTS_N_INSNS (6)}, /* other */
1118 0, /* cost of multiply per each bit set */
1119 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1120 COSTS_N_INSNS (35), /* HI */
1121 COSTS_N_INSNS (51), /* SI */
1122 COSTS_N_INSNS (83), /* DI */
1123 COSTS_N_INSNS (83)}, /* other */
1124 COSTS_N_INSNS (1), /* cost of movsx */
1125 COSTS_N_INSNS (1), /* cost of movzx */
1126 8, /* "large" insn */
1127 9, /* MOVE_RATIO */
1128 4, /* cost for loading QImode using movzbl */
1129 {5, 5, 4}, /* cost of loading integer registers
1130 in QImode, HImode and SImode.
1131 Relative to reg-reg move (2). */
1132 {4, 4, 4}, /* cost of storing integer registers */
1133 2, /* cost of reg,reg fld/fst */
1134 {5, 5, 12}, /* cost of loading fp registers
1135 in SFmode, DFmode and XFmode */
1136 {4, 4, 8}, /* cost of storing fp registers
1137 in SFmode, DFmode and XFmode */
1138 2, /* cost of moving MMX register */
1139 {4, 4}, /* cost of loading MMX registers
1140 in SImode and DImode */
1141 {4, 4}, /* cost of storing MMX registers
1142 in SImode and DImode */
1143 2, /* cost of moving SSE register */
1144 {4, 4, 4}, /* cost of loading SSE registers
1145 in SImode, DImode and TImode */
1146 {4, 4, 4}, /* cost of storing SSE registers
1147 in SImode, DImode and TImode */
1148 2, /* MMX or SSE register to integer */
1149 16, /* size of l1 cache. */
1150 2048, /* size of l2 cache. */
1151 64, /* size of prefetch block */
1152 /* New AMD processors never drop prefetches; if they cannot be performed
1153 immediately, they are queued. We set number of simultaneous prefetches
1154 to a large constant to reflect this (it probably is not a good idea not
1155 to limit number of prefetches at all, as their execution also takes some
1156 time). */
1157 100, /* number of parallel prefetches */
1158 2, /* Branch cost */
1159 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1166 bdver3_memcpy,
1167 bdver3_memset,
1168 6, /* scalar_stmt_cost. */
1169 4, /* scalar load_cost. */
1170 4, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 4, /* vec_align_load_cost. */
1175 4, /* vec_unalign_load_cost. */
1176 4, /* vec_store_cost. */
1177 2, /* cond_taken_branch_cost. */
1178 1, /* cond_not_taken_branch_cost. */
1181 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1182 very small blocks it is better to use loop. For large blocks, libcall
1183 can do nontemporary accesses and beat inline considerably. */
1184 static stringop_algs bdver4_memcpy[2] = {
1185 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1186 {-1, rep_prefix_4_byte, false}}},
1187 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1188 {-1, libcall, false}}}};
1189 static stringop_algs bdver4_memset[2] = {
1190 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1191 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1192 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1193 {-1, libcall, false}}}};
1194 struct processor_costs bdver4_cost = {
1195 COSTS_N_INSNS (1), /* cost of an add instruction */
1196 COSTS_N_INSNS (1), /* cost of a lea instruction */
1197 COSTS_N_INSNS (1), /* variable shift costs */
1198 COSTS_N_INSNS (1), /* constant shift costs */
1199 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1200 COSTS_N_INSNS (4), /* HI */
1201 COSTS_N_INSNS (4), /* SI */
1202 COSTS_N_INSNS (6), /* DI */
1203 COSTS_N_INSNS (6)}, /* other */
1204 0, /* cost of multiply per each bit set */
1205 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1206 COSTS_N_INSNS (35), /* HI */
1207 COSTS_N_INSNS (51), /* SI */
1208 COSTS_N_INSNS (83), /* DI */
1209 COSTS_N_INSNS (83)}, /* other */
1210 COSTS_N_INSNS (1), /* cost of movsx */
1211 COSTS_N_INSNS (1), /* cost of movzx */
1212 8, /* "large" insn */
1213 9, /* MOVE_RATIO */
1214 4, /* cost for loading QImode using movzbl */
1215 {5, 5, 4}, /* cost of loading integer registers
1216 in QImode, HImode and SImode.
1217 Relative to reg-reg move (2). */
1218 {4, 4, 4}, /* cost of storing integer registers */
1219 2, /* cost of reg,reg fld/fst */
1220 {5, 5, 12}, /* cost of loading fp registers
1221 in SFmode, DFmode and XFmode */
1222 {4, 4, 8}, /* cost of storing fp registers
1223 in SFmode, DFmode and XFmode */
1224 2, /* cost of moving MMX register */
1225 {4, 4}, /* cost of loading MMX registers
1226 in SImode and DImode */
1227 {4, 4}, /* cost of storing MMX registers
1228 in SImode and DImode */
1229 2, /* cost of moving SSE register */
1230 {4, 4, 4}, /* cost of loading SSE registers
1231 in SImode, DImode and TImode */
1232 {4, 4, 4}, /* cost of storing SSE registers
1233 in SImode, DImode and TImode */
1234 2, /* MMX or SSE register to integer */
1235 16, /* size of l1 cache. */
1236 2048, /* size of l2 cache. */
1237 64, /* size of prefetch block */
1238 /* New AMD processors never drop prefetches; if they cannot be performed
1239 immediately, they are queued. We set number of simultaneous prefetches
1240 to a large constant to reflect this (it probably is not a good idea not
1241 to limit number of prefetches at all, as their execution also takes some
1242 time). */
1243 100, /* number of parallel prefetches */
1244 2, /* Branch cost */
1245 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1246 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1247 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1248 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1249 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1250 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1252 bdver4_memcpy,
1253 bdver4_memset,
1254 6, /* scalar_stmt_cost. */
1255 4, /* scalar load_cost. */
1256 4, /* scalar_store_cost. */
1257 6, /* vec_stmt_cost. */
1258 0, /* vec_to_scalar_cost. */
1259 2, /* scalar_to_vec_cost. */
1260 4, /* vec_align_load_cost. */
1261 4, /* vec_unalign_load_cost. */
1262 4, /* vec_store_cost. */
1263 2, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1267 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1268 very small blocks it is better to use loop. For large blocks, libcall can
1269 do nontemporary accesses and beat inline considerably. */
1270 static stringop_algs btver1_memcpy[2] = {
1271 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1272 {-1, rep_prefix_4_byte, false}}},
1273 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1274 {-1, libcall, false}}}};
1275 static stringop_algs btver1_memset[2] = {
1276 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1277 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1278 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1279 {-1, libcall, false}}}};
1280 const struct processor_costs btver1_cost = {
1281 COSTS_N_INSNS (1), /* cost of an add instruction */
1282 COSTS_N_INSNS (2), /* cost of a lea instruction */
1283 COSTS_N_INSNS (1), /* variable shift costs */
1284 COSTS_N_INSNS (1), /* constant shift costs */
1285 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1286 COSTS_N_INSNS (4), /* HI */
1287 COSTS_N_INSNS (3), /* SI */
1288 COSTS_N_INSNS (4), /* DI */
1289 COSTS_N_INSNS (5)}, /* other */
1290 0, /* cost of multiply per each bit set */
1291 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1292 COSTS_N_INSNS (35), /* HI */
1293 COSTS_N_INSNS (51), /* SI */
1294 COSTS_N_INSNS (83), /* DI */
1295 COSTS_N_INSNS (83)}, /* other */
1296 COSTS_N_INSNS (1), /* cost of movsx */
1297 COSTS_N_INSNS (1), /* cost of movzx */
1298 8, /* "large" insn */
1299 9, /* MOVE_RATIO */
1300 4, /* cost for loading QImode using movzbl */
1301 {3, 4, 3}, /* cost of loading integer registers
1302 in QImode, HImode and SImode.
1303 Relative to reg-reg move (2). */
1304 {3, 4, 3}, /* cost of storing integer registers */
1305 4, /* cost of reg,reg fld/fst */
1306 {4, 4, 12}, /* cost of loading fp registers
1307 in SFmode, DFmode and XFmode */
1308 {6, 6, 8}, /* cost of storing fp registers
1309 in SFmode, DFmode and XFmode */
1310 2, /* cost of moving MMX register */
1311 {3, 3}, /* cost of loading MMX registers
1312 in SImode and DImode */
1313 {4, 4}, /* cost of storing MMX registers
1314 in SImode and DImode */
1315 2, /* cost of moving SSE register */
1316 {4, 4, 3}, /* cost of loading SSE registers
1317 in SImode, DImode and TImode */
1318 {4, 4, 5}, /* cost of storing SSE registers
1319 in SImode, DImode and TImode */
1320 3, /* MMX or SSE register to integer */
1321 /* On K8:
1322 MOVD reg64, xmmreg Double FSTORE 4
1323 MOVD reg32, xmmreg Double FSTORE 4
1324 On AMDFAM10:
1325 MOVD reg64, xmmreg Double FADD 3
1326 1/1 1/1
1327 MOVD reg32, xmmreg Double FADD 3
1328 1/1 1/1 */
1329 32, /* size of l1 cache. */
1330 512, /* size of l2 cache. */
1331 64, /* size of prefetch block */
1332 100, /* number of parallel prefetches */
1333 2, /* Branch cost */
1334 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1335 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1336 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1337 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1338 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1339 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1341 btver1_memcpy,
1342 btver1_memset,
1343 4, /* scalar_stmt_cost. */
1344 2, /* scalar load_cost. */
1345 2, /* scalar_store_cost. */
1346 6, /* vec_stmt_cost. */
1347 0, /* vec_to_scalar_cost. */
1348 2, /* scalar_to_vec_cost. */
1349 2, /* vec_align_load_cost. */
1350 2, /* vec_unalign_load_cost. */
1351 2, /* vec_store_cost. */
1352 2, /* cond_taken_branch_cost. */
1353 1, /* cond_not_taken_branch_cost. */
1356 static stringop_algs btver2_memcpy[2] = {
1357 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1358 {-1, rep_prefix_4_byte, false}}},
1359 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1360 {-1, libcall, false}}}};
1361 static stringop_algs btver2_memset[2] = {
1362 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1363 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1364 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1365 {-1, libcall, false}}}};
1366 const struct processor_costs btver2_cost = {
1367 COSTS_N_INSNS (1), /* cost of an add instruction */
1368 COSTS_N_INSNS (2), /* cost of a lea instruction */
1369 COSTS_N_INSNS (1), /* variable shift costs */
1370 COSTS_N_INSNS (1), /* constant shift costs */
1371 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1372 COSTS_N_INSNS (4), /* HI */
1373 COSTS_N_INSNS (3), /* SI */
1374 COSTS_N_INSNS (4), /* DI */
1375 COSTS_N_INSNS (5)}, /* other */
1376 0, /* cost of multiply per each bit set */
1377 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1378 COSTS_N_INSNS (35), /* HI */
1379 COSTS_N_INSNS (51), /* SI */
1380 COSTS_N_INSNS (83), /* DI */
1381 COSTS_N_INSNS (83)}, /* other */
1382 COSTS_N_INSNS (1), /* cost of movsx */
1383 COSTS_N_INSNS (1), /* cost of movzx */
1384 8, /* "large" insn */
1385 9, /* MOVE_RATIO */
1386 4, /* cost for loading QImode using movzbl */
1387 {3, 4, 3}, /* cost of loading integer registers
1388 in QImode, HImode and SImode.
1389 Relative to reg-reg move (2). */
1390 {3, 4, 3}, /* cost of storing integer registers */
1391 4, /* cost of reg,reg fld/fst */
1392 {4, 4, 12}, /* cost of loading fp registers
1393 in SFmode, DFmode and XFmode */
1394 {6, 6, 8}, /* cost of storing fp registers
1395 in SFmode, DFmode and XFmode */
1396 2, /* cost of moving MMX register */
1397 {3, 3}, /* cost of loading MMX registers
1398 in SImode and DImode */
1399 {4, 4}, /* cost of storing MMX registers
1400 in SImode and DImode */
1401 2, /* cost of moving SSE register */
1402 {4, 4, 3}, /* cost of loading SSE registers
1403 in SImode, DImode and TImode */
1404 {4, 4, 5}, /* cost of storing SSE registers
1405 in SImode, DImode and TImode */
1406 3, /* MMX or SSE register to integer */
1407 /* On K8:
1408 MOVD reg64, xmmreg Double FSTORE 4
1409 MOVD reg32, xmmreg Double FSTORE 4
1410 On AMDFAM10:
1411 MOVD reg64, xmmreg Double FADD 3
1412 1/1 1/1
1413 MOVD reg32, xmmreg Double FADD 3
1414 1/1 1/1 */
1415 32, /* size of l1 cache. */
1416 2048, /* size of l2 cache. */
1417 64, /* size of prefetch block */
1418 100, /* number of parallel prefetches */
1419 2, /* Branch cost */
1420 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1421 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1422 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1423 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1424 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1425 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1426 btver2_memcpy,
1427 btver2_memset,
1428 4, /* scalar_stmt_cost. */
1429 2, /* scalar load_cost. */
1430 2, /* scalar_store_cost. */
1431 6, /* vec_stmt_cost. */
1432 0, /* vec_to_scalar_cost. */
1433 2, /* scalar_to_vec_cost. */
1434 2, /* vec_align_load_cost. */
1435 2, /* vec_unalign_load_cost. */
1436 2, /* vec_store_cost. */
1437 2, /* cond_taken_branch_cost. */
1438 1, /* cond_not_taken_branch_cost. */
1441 static stringop_algs pentium4_memcpy[2] = {
1442 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1443 DUMMY_STRINGOP_ALGS};
1444 static stringop_algs pentium4_memset[2] = {
1445 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1446 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1447 DUMMY_STRINGOP_ALGS};
1449 static const
1450 struct processor_costs pentium4_cost = {
1451 COSTS_N_INSNS (1), /* cost of an add instruction */
1452 COSTS_N_INSNS (3), /* cost of a lea instruction */
1453 COSTS_N_INSNS (4), /* variable shift costs */
1454 COSTS_N_INSNS (4), /* constant shift costs */
1455 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1456 COSTS_N_INSNS (15), /* HI */
1457 COSTS_N_INSNS (15), /* SI */
1458 COSTS_N_INSNS (15), /* DI */
1459 COSTS_N_INSNS (15)}, /* other */
1460 0, /* cost of multiply per each bit set */
1461 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1462 COSTS_N_INSNS (56), /* HI */
1463 COSTS_N_INSNS (56), /* SI */
1464 COSTS_N_INSNS (56), /* DI */
1465 COSTS_N_INSNS (56)}, /* other */
1466 COSTS_N_INSNS (1), /* cost of movsx */
1467 COSTS_N_INSNS (1), /* cost of movzx */
1468 16, /* "large" insn */
1469 6, /* MOVE_RATIO */
1470 2, /* cost for loading QImode using movzbl */
1471 {4, 5, 4}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {2, 3, 2}, /* cost of storing integer registers */
1475 2, /* cost of reg,reg fld/fst */
1476 {2, 2, 6}, /* cost of loading fp registers
1477 in SFmode, DFmode and XFmode */
1478 {4, 4, 6}, /* cost of storing fp registers
1479 in SFmode, DFmode and XFmode */
1480 2, /* cost of moving MMX register */
1481 {2, 2}, /* cost of loading MMX registers
1482 in SImode and DImode */
1483 {2, 2}, /* cost of storing MMX registers
1484 in SImode and DImode */
1485 12, /* cost of moving SSE register */
1486 {12, 12, 12}, /* cost of loading SSE registers
1487 in SImode, DImode and TImode */
1488 {2, 2, 8}, /* cost of storing SSE registers
1489 in SImode, DImode and TImode */
1490 10, /* MMX or SSE register to integer */
1491 8, /* size of l1 cache. */
1492 256, /* size of l2 cache. */
1493 64, /* size of prefetch block */
1494 6, /* number of parallel prefetches */
1495 2, /* Branch cost */
1496 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1497 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1498 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1501 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1502 pentium4_memcpy,
1503 pentium4_memset,
1504 1, /* scalar_stmt_cost. */
1505 1, /* scalar load_cost. */
1506 1, /* scalar_store_cost. */
1507 1, /* vec_stmt_cost. */
1508 1, /* vec_to_scalar_cost. */
1509 1, /* scalar_to_vec_cost. */
1510 1, /* vec_align_load_cost. */
1511 2, /* vec_unalign_load_cost. */
1512 1, /* vec_store_cost. */
1513 3, /* cond_taken_branch_cost. */
1514 1, /* cond_not_taken_branch_cost. */
1517 static stringop_algs nocona_memcpy[2] = {
1518 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1519 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1520 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1522 static stringop_algs nocona_memset[2] = {
1523 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1524 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1525 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1526 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1528 static const
1529 struct processor_costs nocona_cost = {
1530 COSTS_N_INSNS (1), /* cost of an add instruction */
1531 COSTS_N_INSNS (1), /* cost of a lea instruction */
1532 COSTS_N_INSNS (1), /* variable shift costs */
1533 COSTS_N_INSNS (1), /* constant shift costs */
1534 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1535 COSTS_N_INSNS (10), /* HI */
1536 COSTS_N_INSNS (10), /* SI */
1537 COSTS_N_INSNS (10), /* DI */
1538 COSTS_N_INSNS (10)}, /* other */
1539 0, /* cost of multiply per each bit set */
1540 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1541 COSTS_N_INSNS (66), /* HI */
1542 COSTS_N_INSNS (66), /* SI */
1543 COSTS_N_INSNS (66), /* DI */
1544 COSTS_N_INSNS (66)}, /* other */
1545 COSTS_N_INSNS (1), /* cost of movsx */
1546 COSTS_N_INSNS (1), /* cost of movzx */
1547 16, /* "large" insn */
1548 17, /* MOVE_RATIO */
1549 4, /* cost for loading QImode using movzbl */
1550 {4, 4, 4}, /* cost of loading integer registers
1551 in QImode, HImode and SImode.
1552 Relative to reg-reg move (2). */
1553 {4, 4, 4}, /* cost of storing integer registers */
1554 3, /* cost of reg,reg fld/fst */
1555 {12, 12, 12}, /* cost of loading fp registers
1556 in SFmode, DFmode and XFmode */
1557 {4, 4, 4}, /* cost of storing fp registers
1558 in SFmode, DFmode and XFmode */
1559 6, /* cost of moving MMX register */
1560 {12, 12}, /* cost of loading MMX registers
1561 in SImode and DImode */
1562 {12, 12}, /* cost of storing MMX registers
1563 in SImode and DImode */
1564 6, /* cost of moving SSE register */
1565 {12, 12, 12}, /* cost of loading SSE registers
1566 in SImode, DImode and TImode */
1567 {12, 12, 12}, /* cost of storing SSE registers
1568 in SImode, DImode and TImode */
1569 8, /* MMX or SSE register to integer */
1570 8, /* size of l1 cache. */
1571 1024, /* size of l2 cache. */
1572 64, /* size of prefetch block */
1573 8, /* number of parallel prefetches */
1574 1, /* Branch cost */
1575 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1576 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1577 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1578 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1579 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1580 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1581 nocona_memcpy,
1582 nocona_memset,
1583 1, /* scalar_stmt_cost. */
1584 1, /* scalar load_cost. */
1585 1, /* scalar_store_cost. */
1586 1, /* vec_stmt_cost. */
1587 1, /* vec_to_scalar_cost. */
1588 1, /* scalar_to_vec_cost. */
1589 1, /* vec_align_load_cost. */
1590 2, /* vec_unalign_load_cost. */
1591 1, /* vec_store_cost. */
1592 3, /* cond_taken_branch_cost. */
1593 1, /* cond_not_taken_branch_cost. */
1596 static stringop_algs atom_memcpy[2] = {
1597 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1598 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1599 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1600 static stringop_algs atom_memset[2] = {
1601 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1602 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1604 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs atom_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (4), /* HI */
1613 COSTS_N_INSNS (3), /* SI */
1614 COSTS_N_INSNS (4), /* DI */
1615 COSTS_N_INSNS (2)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (26), /* HI */
1619 COSTS_N_INSNS (42), /* SI */
1620 COSTS_N_INSNS (74), /* DI */
1621 COSTS_N_INSNS (74)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 8, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 4, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {6, 6, 8}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {8, 8}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {8, 8}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 2, /* cost of moving SSE register */
1642 {8, 8, 8}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {8, 8, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 5, /* MMX or SSE register to integer */
1647 32, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 3, /* Branch cost */
1652 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1658 atom_memcpy,
1659 atom_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1673 static stringop_algs slm_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs slm_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs slm_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (3), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 slm_memcpy,
1736 slm_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1750 static stringop_algs intel_memcpy[2] = {
1751 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1752 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1754 static stringop_algs intel_memset[2] = {
1755 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1756 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1757 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1758 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1759 static const
1760 struct processor_costs intel_cost = {
1761 COSTS_N_INSNS (1), /* cost of an add instruction */
1762 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1763 COSTS_N_INSNS (1), /* variable shift costs */
1764 COSTS_N_INSNS (1), /* constant shift costs */
1765 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1766 COSTS_N_INSNS (3), /* HI */
1767 COSTS_N_INSNS (3), /* SI */
1768 COSTS_N_INSNS (4), /* DI */
1769 COSTS_N_INSNS (2)}, /* other */
1770 0, /* cost of multiply per each bit set */
1771 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1772 COSTS_N_INSNS (26), /* HI */
1773 COSTS_N_INSNS (42), /* SI */
1774 COSTS_N_INSNS (74), /* DI */
1775 COSTS_N_INSNS (74)}, /* other */
1776 COSTS_N_INSNS (1), /* cost of movsx */
1777 COSTS_N_INSNS (1), /* cost of movzx */
1778 8, /* "large" insn */
1779 17, /* MOVE_RATIO */
1780 4, /* cost for loading QImode using movzbl */
1781 {4, 4, 4}, /* cost of loading integer registers
1782 in QImode, HImode and SImode.
1783 Relative to reg-reg move (2). */
1784 {4, 4, 4}, /* cost of storing integer registers */
1785 4, /* cost of reg,reg fld/fst */
1786 {12, 12, 12}, /* cost of loading fp registers
1787 in SFmode, DFmode and XFmode */
1788 {6, 6, 8}, /* cost of storing fp registers
1789 in SFmode, DFmode and XFmode */
1790 2, /* cost of moving MMX register */
1791 {8, 8}, /* cost of loading MMX registers
1792 in SImode and DImode */
1793 {8, 8}, /* cost of storing MMX registers
1794 in SImode and DImode */
1795 2, /* cost of moving SSE register */
1796 {8, 8, 8}, /* cost of loading SSE registers
1797 in SImode, DImode and TImode */
1798 {8, 8, 8}, /* cost of storing SSE registers
1799 in SImode, DImode and TImode */
1800 5, /* MMX or SSE register to integer */
1801 32, /* size of l1 cache. */
1802 256, /* size of l2 cache. */
1803 64, /* size of prefetch block */
1804 6, /* number of parallel prefetches */
1805 3, /* Branch cost */
1806 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1807 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1808 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1809 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1810 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1811 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1812 intel_memcpy,
1813 intel_memset,
1814 1, /* scalar_stmt_cost. */
1815 1, /* scalar load_cost. */
1816 1, /* scalar_store_cost. */
1817 1, /* vec_stmt_cost. */
1818 1, /* vec_to_scalar_cost. */
1819 1, /* scalar_to_vec_cost. */
1820 1, /* vec_align_load_cost. */
1821 2, /* vec_unalign_load_cost. */
1822 1, /* vec_store_cost. */
1823 3, /* cond_taken_branch_cost. */
1824 1, /* cond_not_taken_branch_cost. */
1827 /* Generic should produce code tuned for Core-i7 (and newer chips)
1828 and btver1 (and newer chips). */
1830 static stringop_algs generic_memcpy[2] = {
1831 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1832 {-1, libcall, false}}},
1833 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1834 {-1, libcall, false}}}};
1835 static stringop_algs generic_memset[2] = {
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1837 {-1, libcall, false}}},
1838 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1839 {-1, libcall, false}}}};
1840 static const
1841 struct processor_costs generic_cost = {
1842 COSTS_N_INSNS (1), /* cost of an add instruction */
1843 /* On all chips taken into consideration lea is 2 cycles and more. With
1844 this cost however our current implementation of synth_mult results in
1845 use of unnecessary temporary registers causing regression on several
1846 SPECfp benchmarks. */
1847 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1848 COSTS_N_INSNS (1), /* variable shift costs */
1849 COSTS_N_INSNS (1), /* constant shift costs */
1850 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1851 COSTS_N_INSNS (4), /* HI */
1852 COSTS_N_INSNS (3), /* SI */
1853 COSTS_N_INSNS (4), /* DI */
1854 COSTS_N_INSNS (2)}, /* other */
1855 0, /* cost of multiply per each bit set */
1856 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1857 COSTS_N_INSNS (26), /* HI */
1858 COSTS_N_INSNS (42), /* SI */
1859 COSTS_N_INSNS (74), /* DI */
1860 COSTS_N_INSNS (74)}, /* other */
1861 COSTS_N_INSNS (1), /* cost of movsx */
1862 COSTS_N_INSNS (1), /* cost of movzx */
1863 8, /* "large" insn */
1864 17, /* MOVE_RATIO */
1865 4, /* cost for loading QImode using movzbl */
1866 {4, 4, 4}, /* cost of loading integer registers
1867 in QImode, HImode and SImode.
1868 Relative to reg-reg move (2). */
1869 {4, 4, 4}, /* cost of storing integer registers */
1870 4, /* cost of reg,reg fld/fst */
1871 {12, 12, 12}, /* cost of loading fp registers
1872 in SFmode, DFmode and XFmode */
1873 {6, 6, 8}, /* cost of storing fp registers
1874 in SFmode, DFmode and XFmode */
1875 2, /* cost of moving MMX register */
1876 {8, 8}, /* cost of loading MMX registers
1877 in SImode and DImode */
1878 {8, 8}, /* cost of storing MMX registers
1879 in SImode and DImode */
1880 2, /* cost of moving SSE register */
1881 {8, 8, 8}, /* cost of loading SSE registers
1882 in SImode, DImode and TImode */
1883 {8, 8, 8}, /* cost of storing SSE registers
1884 in SImode, DImode and TImode */
1885 5, /* MMX or SSE register to integer */
1886 32, /* size of l1 cache. */
1887 512, /* size of l2 cache. */
1888 64, /* size of prefetch block */
1889 6, /* number of parallel prefetches */
1890 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1891 value is increased to perhaps more appropriate value of 5. */
1892 3, /* Branch cost */
1893 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1894 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1895 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1896 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1897 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1898 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1899 generic_memcpy,
1900 generic_memset,
1901 1, /* scalar_stmt_cost. */
1902 1, /* scalar load_cost. */
1903 1, /* scalar_store_cost. */
1904 1, /* vec_stmt_cost. */
1905 1, /* vec_to_scalar_cost. */
1906 1, /* scalar_to_vec_cost. */
1907 1, /* vec_align_load_cost. */
1908 2, /* vec_unalign_load_cost. */
1909 1, /* vec_store_cost. */
1910 3, /* cond_taken_branch_cost. */
1911 1, /* cond_not_taken_branch_cost. */
1914 /* core_cost should produce code tuned for Core familly of CPUs. */
1915 static stringop_algs core_memcpy[2] = {
1916 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1917 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1918 {-1, libcall, false}}}};
1919 static stringop_algs core_memset[2] = {
1920 {libcall, {{6, loop_1_byte, true},
1921 {24, loop, true},
1922 {8192, rep_prefix_4_byte, true},
1923 {-1, libcall, false}}},
1924 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1925 {-1, libcall, false}}}};
1927 static const
1928 struct processor_costs core_cost = {
1929 COSTS_N_INSNS (1), /* cost of an add instruction */
1930 /* On all chips taken into consideration lea is 2 cycles and more. With
1931 this cost however our current implementation of synth_mult results in
1932 use of unnecessary temporary registers causing regression on several
1933 SPECfp benchmarks. */
1934 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1935 COSTS_N_INSNS (1), /* variable shift costs */
1936 COSTS_N_INSNS (1), /* constant shift costs */
1937 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1938 COSTS_N_INSNS (4), /* HI */
1939 COSTS_N_INSNS (3), /* SI */
1940 COSTS_N_INSNS (4), /* DI */
1941 COSTS_N_INSNS (2)}, /* other */
1942 0, /* cost of multiply per each bit set */
1943 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1944 COSTS_N_INSNS (26), /* HI */
1945 COSTS_N_INSNS (42), /* SI */
1946 COSTS_N_INSNS (74), /* DI */
1947 COSTS_N_INSNS (74)}, /* other */
1948 COSTS_N_INSNS (1), /* cost of movsx */
1949 COSTS_N_INSNS (1), /* cost of movzx */
1950 8, /* "large" insn */
1951 17, /* MOVE_RATIO */
1952 4, /* cost for loading QImode using movzbl */
1953 {4, 4, 4}, /* cost of loading integer registers
1954 in QImode, HImode and SImode.
1955 Relative to reg-reg move (2). */
1956 {4, 4, 4}, /* cost of storing integer registers */
1957 4, /* cost of reg,reg fld/fst */
1958 {12, 12, 12}, /* cost of loading fp registers
1959 in SFmode, DFmode and XFmode */
1960 {6, 6, 8}, /* cost of storing fp registers
1961 in SFmode, DFmode and XFmode */
1962 2, /* cost of moving MMX register */
1963 {8, 8}, /* cost of loading MMX registers
1964 in SImode and DImode */
1965 {8, 8}, /* cost of storing MMX registers
1966 in SImode and DImode */
1967 2, /* cost of moving SSE register */
1968 {8, 8, 8}, /* cost of loading SSE registers
1969 in SImode, DImode and TImode */
1970 {8, 8, 8}, /* cost of storing SSE registers
1971 in SImode, DImode and TImode */
1972 5, /* MMX or SSE register to integer */
1973 64, /* size of l1 cache. */
1974 512, /* size of l2 cache. */
1975 64, /* size of prefetch block */
1976 6, /* number of parallel prefetches */
1977 /* FIXME perhaps more appropriate value is 5. */
1978 3, /* Branch cost */
1979 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1980 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1981 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1982 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1983 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1984 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1985 core_memcpy,
1986 core_memset,
1987 1, /* scalar_stmt_cost. */
1988 1, /* scalar load_cost. */
1989 1, /* scalar_store_cost. */
1990 1, /* vec_stmt_cost. */
1991 1, /* vec_to_scalar_cost. */
1992 1, /* scalar_to_vec_cost. */
1993 1, /* vec_align_load_cost. */
1994 2, /* vec_unalign_load_cost. */
1995 1, /* vec_store_cost. */
1996 3, /* cond_taken_branch_cost. */
1997 1, /* cond_not_taken_branch_cost. */
2001 /* Set by -mtune. */
2002 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2004 /* Set by -mtune or -Os. */
2005 const struct processor_costs *ix86_cost = &pentium_cost;
2007 /* Processor feature/optimization bitmasks. */
2008 #define m_386 (1<<PROCESSOR_I386)
2009 #define m_486 (1<<PROCESSOR_I486)
2010 #define m_PENT (1<<PROCESSOR_PENTIUM)
2011 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2012 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2013 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2014 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2015 #define m_CORE2 (1<<PROCESSOR_CORE2)
2016 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2017 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2018 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2019 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2020 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2021 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2022 #define m_INTEL (1<<PROCESSOR_INTEL)
2024 #define m_GEODE (1<<PROCESSOR_GEODE)
2025 #define m_K6 (1<<PROCESSOR_K6)
2026 #define m_K6_GEODE (m_K6 | m_GEODE)
2027 #define m_K8 (1<<PROCESSOR_K8)
2028 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2029 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2030 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2031 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2032 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2033 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2034 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2035 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2036 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2037 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2038 #define m_BTVER (m_BTVER1 | m_BTVER2)
2039 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2041 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2043 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2044 #undef DEF_TUNE
2045 #define DEF_TUNE(tune, name, selector) name,
2046 #include "x86-tune.def"
2047 #undef DEF_TUNE
2050 /* Feature tests against the various tunings. */
2051 unsigned char ix86_tune_features[X86_TUNE_LAST];
2053 /* Feature tests against the various tunings used to create ix86_tune_features
2054 based on the processor mask. */
2055 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2056 #undef DEF_TUNE
2057 #define DEF_TUNE(tune, name, selector) selector,
2058 #include "x86-tune.def"
2059 #undef DEF_TUNE
2062 /* Feature tests against the various architecture variations. */
2063 unsigned char ix86_arch_features[X86_ARCH_LAST];
2065 /* Feature tests against the various architecture variations, used to create
2066 ix86_arch_features based on the processor mask. */
2067 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2068 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2069 ~(m_386 | m_486 | m_PENT | m_K6),
2071 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2072 ~m_386,
2074 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2075 ~(m_386 | m_486),
2077 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2078 ~m_386,
2080 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2081 ~m_386,
2084 /* In case the average insn count for single function invocation is
2085 lower than this constant, emit fast (but longer) prologue and
2086 epilogue code. */
2087 #define FAST_PROLOGUE_INSN_COUNT 20
2089 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2090 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2091 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2092 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2094 /* Array of the smallest class containing reg number REGNO, indexed by
2095 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2097 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2099 /* ax, dx, cx, bx */
2100 AREG, DREG, CREG, BREG,
2101 /* si, di, bp, sp */
2102 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2103 /* FP registers */
2104 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2105 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2106 /* arg pointer */
2107 NON_Q_REGS,
2108 /* flags, fpsr, fpcr, frame */
2109 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2110 /* SSE registers */
2111 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2112 SSE_REGS, SSE_REGS,
2113 /* MMX registers */
2114 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2115 MMX_REGS, MMX_REGS,
2116 /* REX registers */
2117 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2118 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2119 /* SSE REX registers */
2120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2121 SSE_REGS, SSE_REGS,
2122 /* AVX-512 SSE registers */
2123 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2124 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2125 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 /* Mask registers. */
2128 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2129 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 /* The "default" register map used in 32bit mode. */
2134 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2145 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2148 /* The "default" register map used in 64bit mode. */
2150 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2152 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2153 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2154 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2155 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2156 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2157 8,9,10,11,12,13,14,15, /* extended integer registers */
2158 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2159 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2160 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2161 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2164 /* Define the register numbers to be used in Dwarf debugging information.
2165 The SVR4 reference port C compiler uses the following register numbers
2166 in its Dwarf output code:
2167 0 for %eax (gcc regno = 0)
2168 1 for %ecx (gcc regno = 2)
2169 2 for %edx (gcc regno = 1)
2170 3 for %ebx (gcc regno = 3)
2171 4 for %esp (gcc regno = 7)
2172 5 for %ebp (gcc regno = 6)
2173 6 for %esi (gcc regno = 4)
2174 7 for %edi (gcc regno = 5)
2175 The following three DWARF register numbers are never generated by
2176 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2177 believes these numbers have these meanings.
2178 8 for %eip (no gcc equivalent)
2179 9 for %eflags (gcc regno = 17)
2180 10 for %trapno (no gcc equivalent)
2181 It is not at all clear how we should number the FP stack registers
2182 for the x86 architecture. If the version of SDB on x86/svr4 were
2183 a bit less brain dead with respect to floating-point then we would
2184 have a precedent to follow with respect to DWARF register numbers
2185 for x86 FP registers, but the SDB on x86/svr4 is so completely
2186 broken with respect to FP registers that it is hardly worth thinking
2187 of it as something to strive for compatibility with.
2188 The version of x86/svr4 SDB I have at the moment does (partially)
2189 seem to believe that DWARF register number 11 is associated with
2190 the x86 register %st(0), but that's about all. Higher DWARF
2191 register numbers don't seem to be associated with anything in
2192 particular, and even for DWARF regno 11, SDB only seems to under-
2193 stand that it should say that a variable lives in %st(0) (when
2194 asked via an `=' command) if we said it was in DWARF regno 11,
2195 but SDB still prints garbage when asked for the value of the
2196 variable in question (via a `/' command).
2197 (Also note that the labels SDB prints for various FP stack regs
2198 when doing an `x' command are all wrong.)
2199 Note that these problems generally don't affect the native SVR4
2200 C compiler because it doesn't allow the use of -O with -g and
2201 because when it is *not* optimizing, it allocates a memory
2202 location for each floating-point variable, and the memory
2203 location is what gets described in the DWARF AT_location
2204 attribute for the variable in question.
2205 Regardless of the severe mental illness of the x86/svr4 SDB, we
2206 do something sensible here and we use the following DWARF
2207 register numbers. Note that these are all stack-top-relative
2208 numbers.
2209 11 for %st(0) (gcc regno = 8)
2210 12 for %st(1) (gcc regno = 9)
2211 13 for %st(2) (gcc regno = 10)
2212 14 for %st(3) (gcc regno = 11)
2213 15 for %st(4) (gcc regno = 12)
2214 16 for %st(5) (gcc regno = 13)
2215 17 for %st(6) (gcc regno = 14)
2216 18 for %st(7) (gcc regno = 15)
2218 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2220 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2221 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2222 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2223 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2224 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2226 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2227 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2229 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2232 /* Define parameter passing and return registers. */
2234 static int const x86_64_int_parameter_registers[6] =
2236 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2239 static int const x86_64_ms_abi_int_parameter_registers[4] =
2241 CX_REG, DX_REG, R8_REG, R9_REG
2244 static int const x86_64_int_return_registers[4] =
2246 AX_REG, DX_REG, DI_REG, SI_REG
2249 /* Additional registers that are clobbered by SYSV calls. */
2251 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2253 SI_REG, DI_REG,
2254 XMM6_REG, XMM7_REG,
2255 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2256 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2259 /* Define the structure for the machine field in struct function. */
2261 struct GTY(()) stack_local_entry {
2262 unsigned short mode;
2263 unsigned short n;
2264 rtx rtl;
2265 struct stack_local_entry *next;
2268 /* Structure describing stack frame layout.
2269 Stack grows downward:
2271 [arguments]
2272 <- ARG_POINTER
2273 saved pc
2275 saved static chain if ix86_static_chain_on_stack
2277 saved frame pointer if frame_pointer_needed
2278 <- HARD_FRAME_POINTER
2279 [saved regs]
2280 <- regs_save_offset
2281 [padding0]
2283 [saved SSE regs]
2284 <- sse_regs_save_offset
2285 [padding1] |
2286 | <- FRAME_POINTER
2287 [va_arg registers] |
2289 [frame] |
2291 [padding2] | = to_allocate
2292 <- STACK_POINTER
2294 struct ix86_frame
2296 int nsseregs;
2297 int nregs;
2298 int va_arg_size;
2299 int red_zone_size;
2300 int outgoing_arguments_size;
2302 /* The offsets relative to ARG_POINTER. */
2303 HOST_WIDE_INT frame_pointer_offset;
2304 HOST_WIDE_INT hard_frame_pointer_offset;
2305 HOST_WIDE_INT stack_pointer_offset;
2306 HOST_WIDE_INT hfp_save_offset;
2307 HOST_WIDE_INT reg_save_offset;
2308 HOST_WIDE_INT sse_reg_save_offset;
2310 /* When save_regs_using_mov is set, emit prologue using
2311 move instead of push instructions. */
2312 bool save_regs_using_mov;
2315 /* Which cpu are we scheduling for. */
2316 enum attr_cpu ix86_schedule;
2318 /* Which cpu are we optimizing for. */
2319 enum processor_type ix86_tune;
2321 /* Which instruction set architecture to use. */
2322 enum processor_type ix86_arch;
2324 /* True if processor has SSE prefetch instruction. */
2325 unsigned char x86_prefetch_sse;
2327 /* -mstackrealign option */
2328 static const char ix86_force_align_arg_pointer_string[]
2329 = "force_align_arg_pointer";
2331 static rtx (*ix86_gen_leave) (void);
2332 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2333 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2334 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2335 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2336 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2339 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2344 /* Preferred alignment for stack boundary in bits. */
2345 unsigned int ix86_preferred_stack_boundary;
2347 /* Alignment for incoming stack boundary in bits specified at
2348 command line. */
2349 static unsigned int ix86_user_incoming_stack_boundary;
2351 /* Default alignment for incoming stack boundary in bits. */
2352 static unsigned int ix86_default_incoming_stack_boundary;
2354 /* Alignment for incoming stack boundary in bits. */
2355 unsigned int ix86_incoming_stack_boundary;
2357 /* Calling abi specific va_list type nodes. */
2358 static GTY(()) tree sysv_va_list_type_node;
2359 static GTY(()) tree ms_va_list_type_node;
2361 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2362 char internal_label_prefix[16];
2363 int internal_label_prefix_len;
2365 /* Fence to use after loop using movnt. */
2366 tree x86_mfence;
2368 /* Register class used for passing given 64bit part of the argument.
2369 These represent classes as documented by the PS ABI, with the exception
2370 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2371 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2373 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2374 whenever possible (upper half does contain padding). */
2375 enum x86_64_reg_class
2377 X86_64_NO_CLASS,
2378 X86_64_INTEGER_CLASS,
2379 X86_64_INTEGERSI_CLASS,
2380 X86_64_SSE_CLASS,
2381 X86_64_SSESF_CLASS,
2382 X86_64_SSEDF_CLASS,
2383 X86_64_SSEUP_CLASS,
2384 X86_64_X87_CLASS,
2385 X86_64_X87UP_CLASS,
2386 X86_64_COMPLEX_X87_CLASS,
2387 X86_64_MEMORY_CLASS
2390 #define MAX_CLASSES 8
2392 /* Table of constants used by fldpi, fldln2, etc.... */
2393 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2394 static bool ext_80387_constants_init = 0;
2397 static struct machine_function * ix86_init_machine_status (void);
2398 static rtx ix86_function_value (const_tree, const_tree, bool);
2399 static bool ix86_function_value_regno_p (const unsigned int);
2400 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2401 const_tree);
2402 static rtx ix86_static_chain (const_tree, bool);
2403 static int ix86_function_regparm (const_tree, const_tree);
2404 static void ix86_compute_frame_layout (struct ix86_frame *);
2405 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2406 rtx, rtx, int);
2407 static void ix86_add_new_builtins (HOST_WIDE_INT);
2408 static tree ix86_canonical_va_list_type (tree);
2409 static void predict_jump (int);
2410 static unsigned int split_stack_prologue_scratch_regno (void);
2411 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2413 enum ix86_function_specific_strings
2415 IX86_FUNCTION_SPECIFIC_ARCH,
2416 IX86_FUNCTION_SPECIFIC_TUNE,
2417 IX86_FUNCTION_SPECIFIC_MAX
2420 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2421 const char *, enum fpmath_unit, bool);
2422 static void ix86_function_specific_save (struct cl_target_option *,
2423 struct gcc_options *opts);
2424 static void ix86_function_specific_restore (struct gcc_options *opts,
2425 struct cl_target_option *);
2426 static void ix86_function_specific_print (FILE *, int,
2427 struct cl_target_option *);
2428 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2429 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2430 struct gcc_options *,
2431 struct gcc_options *,
2432 struct gcc_options *);
2433 static bool ix86_can_inline_p (tree, tree);
2434 static void ix86_set_current_function (tree);
2435 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2437 static enum calling_abi ix86_function_abi (const_tree);
2440 #ifndef SUBTARGET32_DEFAULT_CPU
2441 #define SUBTARGET32_DEFAULT_CPU "i386"
2442 #endif
2444 /* Whether -mtune= or -march= were specified */
2445 static int ix86_tune_defaulted;
2446 static int ix86_arch_specified;
2448 /* Vectorization library interface and handlers. */
2449 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2451 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2452 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2454 /* Processor target table, indexed by processor number */
2455 struct ptt
2457 const char *const name; /* processor name */
2458 const struct processor_costs *cost; /* Processor costs */
2459 const int align_loop; /* Default alignments. */
2460 const int align_loop_max_skip;
2461 const int align_jump;
2462 const int align_jump_max_skip;
2463 const int align_func;
2466 /* This table must be in sync with enum processor_type in i386.h. */
2467 static const struct ptt processor_target_table[PROCESSOR_max] =
2469 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2470 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2471 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2472 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2473 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2474 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2475 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2476 {"core2", &core_cost, 16, 10, 16, 10, 16},
2477 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2478 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2479 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2480 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2481 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2482 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2483 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2484 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2485 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2486 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2487 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2488 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2489 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2490 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2491 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2492 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2493 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2496 static bool
2497 gate_insert_vzeroupper (void)
2499 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2502 static unsigned int
2503 rest_of_handle_insert_vzeroupper (void)
2505 int i;
2507 /* vzeroupper instructions are inserted immediately after reload to
2508 account for possible spills from 256bit registers. The pass
2509 reuses mode switching infrastructure by re-running mode insertion
2510 pass, so disable entities that have already been processed. */
2511 for (i = 0; i < MAX_386_ENTITIES; i++)
2512 ix86_optimize_mode_switching[i] = 0;
2514 ix86_optimize_mode_switching[AVX_U128] = 1;
2516 /* Call optimize_mode_switching. */
2517 g->get_passes ()->execute_pass_mode_switching ();
2518 return 0;
2521 namespace {
2523 const pass_data pass_data_insert_vzeroupper =
2525 RTL_PASS, /* type */
2526 "vzeroupper", /* name */
2527 OPTGROUP_NONE, /* optinfo_flags */
2528 true, /* has_gate */
2529 true, /* has_execute */
2530 TV_NONE, /* tv_id */
2531 0, /* properties_required */
2532 0, /* properties_provided */
2533 0, /* properties_destroyed */
2534 0, /* todo_flags_start */
2535 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2538 class pass_insert_vzeroupper : public rtl_opt_pass
2540 public:
2541 pass_insert_vzeroupper(gcc::context *ctxt)
2542 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2545 /* opt_pass methods: */
2546 bool gate () { return gate_insert_vzeroupper (); }
2547 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2549 }; // class pass_insert_vzeroupper
2551 } // anon namespace
2553 rtl_opt_pass *
2554 make_pass_insert_vzeroupper (gcc::context *ctxt)
2556 return new pass_insert_vzeroupper (ctxt);
2559 /* Return true if a red-zone is in use. */
2561 static inline bool
2562 ix86_using_red_zone (void)
2564 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 /* Return a string that documents the current -m options. The caller is
2568 responsible for freeing the string. */
2570 static char *
2571 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2572 const char *tune, enum fpmath_unit fpmath,
2573 bool add_nl_p)
2575 struct ix86_target_opts
2577 const char *option; /* option string */
2578 HOST_WIDE_INT mask; /* isa mask options */
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2585 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2586 { "-mfma", OPTION_MASK_ISA_FMA },
2587 { "-mxop", OPTION_MASK_ISA_XOP },
2588 { "-mlwp", OPTION_MASK_ISA_LWP },
2589 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2590 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2591 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2592 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2593 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2594 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2595 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2596 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2597 { "-msse3", OPTION_MASK_ISA_SSE3 },
2598 { "-msse2", OPTION_MASK_ISA_SSE2 },
2599 { "-msse", OPTION_MASK_ISA_SSE },
2600 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2601 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2602 { "-mmmx", OPTION_MASK_ISA_MMX },
2603 { "-mabm", OPTION_MASK_ISA_ABM },
2604 { "-mbmi", OPTION_MASK_ISA_BMI },
2605 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2606 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2607 { "-mhle", OPTION_MASK_ISA_HLE },
2608 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2609 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2610 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2611 { "-madx", OPTION_MASK_ISA_ADX },
2612 { "-mtbm", OPTION_MASK_ISA_TBM },
2613 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2614 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2615 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2616 { "-maes", OPTION_MASK_ISA_AES },
2617 { "-msha", OPTION_MASK_ISA_SHA },
2618 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2619 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2620 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2621 { "-mf16c", OPTION_MASK_ISA_F16C },
2622 { "-mrtm", OPTION_MASK_ISA_RTM },
2623 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2624 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2627 /* Flag options. */
2628 static struct ix86_target_opts flag_opts[] =
2630 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2631 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2632 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2633 { "-m80387", MASK_80387 },
2634 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2635 { "-malign-double", MASK_ALIGN_DOUBLE },
2636 { "-mcld", MASK_CLD },
2637 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2638 { "-mieee-fp", MASK_IEEE_FP },
2639 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2640 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2641 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2642 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2643 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2644 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2645 { "-mno-red-zone", MASK_NO_RED_ZONE },
2646 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2647 { "-mrecip", MASK_RECIP },
2648 { "-mrtd", MASK_RTD },
2649 { "-msseregparm", MASK_SSEREGPARM },
2650 { "-mstack-arg-probe", MASK_STACK_PROBE },
2651 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2652 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2653 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2654 { "-mvzeroupper", MASK_VZEROUPPER },
2655 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2656 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2657 { "-mprefer-avx128", MASK_PREFER_AVX128},
2660 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2662 char isa_other[40];
2663 char target_other[40];
2664 unsigned num = 0;
2665 unsigned i, j;
2666 char *ret;
2667 char *ptr;
2668 size_t len;
2669 size_t line_len;
2670 size_t sep_len;
2671 const char *abi;
2673 memset (opts, '\0', sizeof (opts));
2675 /* Add -march= option. */
2676 if (arch)
2678 opts[num][0] = "-march=";
2679 opts[num++][1] = arch;
2682 /* Add -mtune= option. */
2683 if (tune)
2685 opts[num][0] = "-mtune=";
2686 opts[num++][1] = tune;
2689 /* Add -m32/-m64/-mx32. */
2690 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2692 if ((isa & OPTION_MASK_ABI_64) != 0)
2693 abi = "-m64";
2694 else
2695 abi = "-mx32";
2696 isa &= ~ (OPTION_MASK_ISA_64BIT
2697 | OPTION_MASK_ABI_64
2698 | OPTION_MASK_ABI_X32);
2700 else
2701 abi = "-m32";
2702 opts[num++][0] = abi;
2704 /* Pick out the options in isa options. */
2705 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2707 if ((isa & isa_opts[i].mask) != 0)
2709 opts[num++][0] = isa_opts[i].option;
2710 isa &= ~ isa_opts[i].mask;
2714 if (isa && add_nl_p)
2716 opts[num++][0] = isa_other;
2717 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2718 isa);
2721 /* Add flag options. */
2722 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2724 if ((flags & flag_opts[i].mask) != 0)
2726 opts[num++][0] = flag_opts[i].option;
2727 flags &= ~ flag_opts[i].mask;
2731 if (flags && add_nl_p)
2733 opts[num++][0] = target_other;
2734 sprintf (target_other, "(other flags: %#x)", flags);
2737 /* Add -fpmath= option. */
2738 if (fpmath)
2740 opts[num][0] = "-mfpmath=";
2741 switch ((int) fpmath)
2743 case FPMATH_387:
2744 opts[num++][1] = "387";
2745 break;
2747 case FPMATH_SSE:
2748 opts[num++][1] = "sse";
2749 break;
2751 case FPMATH_387 | FPMATH_SSE:
2752 opts[num++][1] = "sse+387";
2753 break;
2755 default:
2756 gcc_unreachable ();
2760 /* Any options? */
2761 if (num == 0)
2762 return NULL;
2764 gcc_assert (num < ARRAY_SIZE (opts));
2766 /* Size the string. */
2767 len = 0;
2768 sep_len = (add_nl_p) ? 3 : 1;
2769 for (i = 0; i < num; i++)
2771 len += sep_len;
2772 for (j = 0; j < 2; j++)
2773 if (opts[i][j])
2774 len += strlen (opts[i][j]);
2777 /* Build the string. */
2778 ret = ptr = (char *) xmalloc (len);
2779 line_len = 0;
2781 for (i = 0; i < num; i++)
2783 size_t len2[2];
2785 for (j = 0; j < 2; j++)
2786 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2788 if (i != 0)
2790 *ptr++ = ' ';
2791 line_len++;
2793 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2795 *ptr++ = '\\';
2796 *ptr++ = '\n';
2797 line_len = 0;
2801 for (j = 0; j < 2; j++)
2802 if (opts[i][j])
2804 memcpy (ptr, opts[i][j], len2[j]);
2805 ptr += len2[j];
2806 line_len += len2[j];
2810 *ptr = '\0';
2811 gcc_assert (ret + len >= ptr);
2813 return ret;
2816 /* Return true, if profiling code should be emitted before
2817 prologue. Otherwise it returns false.
2818 Note: For x86 with "hotfix" it is sorried. */
2819 static bool
2820 ix86_profile_before_prologue (void)
2822 return flag_fentry != 0;
2825 /* Function that is callable from the debugger to print the current
2826 options. */
2827 void ATTRIBUTE_UNUSED
2828 ix86_debug_options (void)
2830 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2831 ix86_arch_string, ix86_tune_string,
2832 ix86_fpmath, true);
2834 if (opts)
2836 fprintf (stderr, "%s\n\n", opts);
2837 free (opts);
2839 else
2840 fputs ("<no options>\n\n", stderr);
2842 return;
2845 static const char *stringop_alg_names[] = {
2846 #define DEF_ENUM
2847 #define DEF_ALG(alg, name) #name,
2848 #include "stringop.def"
2849 #undef DEF_ENUM
2850 #undef DEF_ALG
2853 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2854 The string is of the following form (or comma separated list of it):
2856 strategy_alg:max_size:[align|noalign]
2858 where the full size range for the strategy is either [0, max_size] or
2859 [min_size, max_size], in which min_size is the max_size + 1 of the
2860 preceding range. The last size range must have max_size == -1.
2862 Examples:
2865 -mmemcpy-strategy=libcall:-1:noalign
2867 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2871 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2873 This is to tell the compiler to use the following strategy for memset
2874 1) when the expected size is between [1, 16], use rep_8byte strategy;
2875 2) when the size is between [17, 2048], use vector_loop;
2876 3) when the size is > 2048, use libcall. */
2878 struct stringop_size_range
2880 int max;
2881 stringop_alg alg;
2882 bool noalign;
2885 static void
2886 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2888 const struct stringop_algs *default_algs;
2889 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2890 char *curr_range_str, *next_range_str;
2891 int i = 0, n = 0;
2893 if (is_memset)
2894 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2895 else
2896 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2898 curr_range_str = strategy_str;
2902 int maxs;
2903 char alg_name[128];
2904 char align[16];
2905 next_range_str = strchr (curr_range_str, ',');
2906 if (next_range_str)
2907 *next_range_str++ = '\0';
2909 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2910 alg_name, &maxs, align))
2912 error ("wrong arg %s to option %s", curr_range_str,
2913 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2914 return;
2917 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2919 error ("size ranges of option %s should be increasing",
2920 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2921 return;
2924 for (i = 0; i < last_alg; i++)
2925 if (!strcmp (alg_name, stringop_alg_names[i]))
2926 break;
2928 if (i == last_alg)
2930 error ("wrong stringop strategy name %s specified for option %s",
2931 alg_name,
2932 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2933 return;
2936 input_ranges[n].max = maxs;
2937 input_ranges[n].alg = (stringop_alg) i;
2938 if (!strcmp (align, "align"))
2939 input_ranges[n].noalign = false;
2940 else if (!strcmp (align, "noalign"))
2941 input_ranges[n].noalign = true;
2942 else
2944 error ("unknown alignment %s specified for option %s",
2945 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2946 return;
2948 n++;
2949 curr_range_str = next_range_str;
2951 while (curr_range_str);
2953 if (input_ranges[n - 1].max != -1)
2955 error ("the max value for the last size range should be -1"
2956 " for option %s",
2957 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2958 return;
2961 if (n > MAX_STRINGOP_ALGS)
2963 error ("too many size ranges specified in option %s",
2964 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2965 return;
2968 /* Now override the default algs array. */
2969 for (i = 0; i < n; i++)
2971 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2972 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2973 = input_ranges[i].alg;
2974 *const_cast<int *>(&default_algs->size[i].noalign)
2975 = input_ranges[i].noalign;
2980 /* parse -mtune-ctrl= option. When DUMP is true,
2981 print the features that are explicitly set. */
2983 static void
2984 parse_mtune_ctrl_str (bool dump)
2986 if (!ix86_tune_ctrl_string)
2987 return;
2989 char *next_feature_string = NULL;
2990 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2991 char *orig = curr_feature_string;
2992 int i;
2995 bool clear = false;
2997 next_feature_string = strchr (curr_feature_string, ',');
2998 if (next_feature_string)
2999 *next_feature_string++ = '\0';
3000 if (*curr_feature_string == '^')
3002 curr_feature_string++;
3003 clear = true;
3005 for (i = 0; i < X86_TUNE_LAST; i++)
3007 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3009 ix86_tune_features[i] = !clear;
3010 if (dump)
3011 fprintf (stderr, "Explicitly %s feature %s\n",
3012 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3013 break;
3016 if (i == X86_TUNE_LAST)
3017 error ("Unknown parameter to option -mtune-ctrl: %s",
3018 clear ? curr_feature_string - 1 : curr_feature_string);
3019 curr_feature_string = next_feature_string;
3021 while (curr_feature_string);
3022 free (orig);
3025 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3026 processor type. */
3028 static void
3029 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3031 unsigned int ix86_tune_mask = 1u << ix86_tune;
3032 int i;
3034 for (i = 0; i < X86_TUNE_LAST; ++i)
3036 if (ix86_tune_no_default)
3037 ix86_tune_features[i] = 0;
3038 else
3039 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3042 if (dump)
3044 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3045 for (i = 0; i < X86_TUNE_LAST; i++)
3046 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3047 ix86_tune_features[i] ? "on" : "off");
3050 parse_mtune_ctrl_str (dump);
3054 /* Override various settings based on options. If MAIN_ARGS_P, the
3055 options are from the command line, otherwise they are from
3056 attributes. */
3058 static void
3059 ix86_option_override_internal (bool main_args_p,
3060 struct gcc_options *opts,
3061 struct gcc_options *opts_set)
3063 int i;
3064 unsigned int ix86_arch_mask;
3065 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3066 const char *prefix;
3067 const char *suffix;
3068 const char *sw;
3070 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3071 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3072 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3073 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3074 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3075 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3076 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3077 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3078 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3079 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3080 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3081 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3082 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3083 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3084 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3085 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3086 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3087 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3088 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3089 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3090 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3091 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3092 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3093 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3094 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3095 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3096 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3097 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3098 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3099 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3100 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3101 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3102 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3103 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3104 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3105 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3106 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3107 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3108 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3109 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3110 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3111 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3112 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3113 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3114 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3116 #define PTA_CORE2 \
3117 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3118 | PTA_CX16 | PTA_FXSR)
3119 #define PTA_NEHALEM \
3120 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3121 #define PTA_WESTMERE \
3122 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3123 #define PTA_SANDYBRIDGE \
3124 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3125 #define PTA_IVYBRIDGE \
3126 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3127 #define PTA_HASWELL \
3128 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3129 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
3130 #define PTA_BROADWELL \
3131 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3132 #define PTA_BONNELL \
3133 (PTA_CORE2 | PTA_MOVBE)
3134 #define PTA_SILVERMONT \
3135 (PTA_WESTMERE | PTA_MOVBE)
3137 /* if this reaches 64, need to widen struct pta flags below */
3139 static struct pta
3141 const char *const name; /* processor name or nickname. */
3142 const enum processor_type processor;
3143 const enum attr_cpu schedule;
3144 const unsigned HOST_WIDE_INT flags;
3146 const processor_alias_table[] =
3148 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3149 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3150 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3151 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3152 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3153 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3154 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3155 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3156 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3157 PTA_MMX | PTA_SSE | PTA_FXSR},
3158 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3159 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3160 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3161 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3162 PTA_MMX | PTA_SSE | PTA_FXSR},
3163 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3164 PTA_MMX | PTA_SSE | PTA_FXSR},
3165 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3166 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3167 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3168 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3169 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3170 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3171 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3172 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3173 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3174 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3175 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3176 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3177 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3178 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3179 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3180 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3181 PTA_SANDYBRIDGE},
3182 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3183 PTA_SANDYBRIDGE},
3184 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3185 PTA_IVYBRIDGE},
3186 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3187 PTA_IVYBRIDGE},
3188 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3189 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3190 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3191 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3192 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3193 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3194 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3195 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3196 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3197 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3198 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3199 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3200 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3201 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3202 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3203 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3204 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3205 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3206 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3207 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3209 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3210 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3211 {"x86-64", PROCESSOR_K8, CPU_K8,
3212 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3213 {"k8", PROCESSOR_K8, CPU_K8,
3214 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3215 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3216 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3217 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3218 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3219 {"opteron", PROCESSOR_K8, CPU_K8,
3220 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3221 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3222 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3223 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3224 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon64", PROCESSOR_K8, CPU_K8,
3226 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3227 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3228 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3230 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3231 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3233 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3234 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3235 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3236 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3237 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3238 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3239 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3240 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3241 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3242 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3243 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3244 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3245 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3246 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3247 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3248 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3249 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3250 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3251 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3252 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3253 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3254 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3255 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3256 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3257 | PTA_XSAVEOPT | PTA_FSGSBASE},
3258 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3259 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3260 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3261 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3262 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3263 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3264 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3265 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3266 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3267 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3268 | PTA_FXSR | PTA_XSAVE},
3269 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3270 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3271 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3272 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3273 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3274 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3276 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3277 PTA_64BIT
3278 | PTA_HLE /* flags are only used for -march switch. */ },
3281 /* -mrecip options. */
3282 static struct
3284 const char *string; /* option name */
3285 unsigned int mask; /* mask bits to set */
3287 const recip_options[] =
3289 { "all", RECIP_MASK_ALL },
3290 { "none", RECIP_MASK_NONE },
3291 { "div", RECIP_MASK_DIV },
3292 { "sqrt", RECIP_MASK_SQRT },
3293 { "vec-div", RECIP_MASK_VEC_DIV },
3294 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3297 int const pta_size = ARRAY_SIZE (processor_alias_table);
3299 /* Set up prefix/suffix so the error messages refer to either the command
3300 line argument, or the attribute(target). */
3301 if (main_args_p)
3303 prefix = "-m";
3304 suffix = "";
3305 sw = "switch";
3307 else
3309 prefix = "option(\"";
3310 suffix = "\")";
3311 sw = "attribute";
3314 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3315 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3316 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3317 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3318 #ifdef TARGET_BI_ARCH
3319 else
3321 #if TARGET_BI_ARCH == 1
3322 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3323 is on and OPTION_MASK_ABI_X32 is off. We turn off
3324 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3325 -mx32. */
3326 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3327 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3328 #else
3329 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3330 on and OPTION_MASK_ABI_64 is off. We turn off
3331 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3332 -m64. */
3333 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3335 #endif
3337 #endif
3339 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3341 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3342 OPTION_MASK_ABI_64 for TARGET_X32. */
3343 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3346 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3347 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3348 | OPTION_MASK_ABI_X32
3349 | OPTION_MASK_ABI_64);
3350 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3352 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3353 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3354 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3355 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3358 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3359 SUBTARGET_OVERRIDE_OPTIONS;
3360 #endif
3362 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3363 SUBSUBTARGET_OVERRIDE_OPTIONS;
3364 #endif
3366 /* -fPIC is the default for x86_64. */
3367 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3368 opts->x_flag_pic = 2;
3370 /* Need to check -mtune=generic first. */
3371 if (opts->x_ix86_tune_string)
3373 /* As special support for cross compilers we read -mtune=native
3374 as -mtune=generic. With native compilers we won't see the
3375 -mtune=native, as it was changed by the driver. */
3376 if (!strcmp (opts->x_ix86_tune_string, "native"))
3378 opts->x_ix86_tune_string = "generic";
3380 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3381 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3382 "%stune=k8%s or %stune=generic%s instead as appropriate",
3383 prefix, suffix, prefix, suffix, prefix, suffix);
3385 else
3387 if (opts->x_ix86_arch_string)
3388 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3389 if (!opts->x_ix86_tune_string)
3391 opts->x_ix86_tune_string
3392 = processor_target_table[TARGET_CPU_DEFAULT].name;
3393 ix86_tune_defaulted = 1;
3396 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3397 or defaulted. We need to use a sensible tune option. */
3398 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3400 opts->x_ix86_tune_string = "generic";
3404 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3405 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3407 /* rep; movq isn't available in 32-bit code. */
3408 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3409 opts->x_ix86_stringop_alg = no_stringop;
3412 if (!opts->x_ix86_arch_string)
3413 opts->x_ix86_arch_string
3414 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3415 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3416 else
3417 ix86_arch_specified = 1;
3419 if (opts_set->x_ix86_pmode)
3421 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3422 && opts->x_ix86_pmode == PMODE_SI)
3423 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3424 && opts->x_ix86_pmode == PMODE_DI))
3425 error ("address mode %qs not supported in the %s bit mode",
3426 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3427 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3429 else
3430 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3431 ? PMODE_DI : PMODE_SI;
3433 if (!opts_set->x_ix86_abi)
3434 opts->x_ix86_abi = DEFAULT_ABI;
3436 /* For targets using ms ABI enable ms-extensions, if not
3437 explicit turned off. For non-ms ABI we turn off this
3438 option. */
3439 if (!opts_set->x_flag_ms_extensions)
3440 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3442 if (opts_set->x_ix86_cmodel)
3444 switch (opts->x_ix86_cmodel)
3446 case CM_SMALL:
3447 case CM_SMALL_PIC:
3448 if (opts->x_flag_pic)
3449 opts->x_ix86_cmodel = CM_SMALL_PIC;
3450 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3451 error ("code model %qs not supported in the %s bit mode",
3452 "small", "32");
3453 break;
3455 case CM_MEDIUM:
3456 case CM_MEDIUM_PIC:
3457 if (opts->x_flag_pic)
3458 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3459 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3460 error ("code model %qs not supported in the %s bit mode",
3461 "medium", "32");
3462 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3463 error ("code model %qs not supported in x32 mode",
3464 "medium");
3465 break;
3467 case CM_LARGE:
3468 case CM_LARGE_PIC:
3469 if (opts->x_flag_pic)
3470 opts->x_ix86_cmodel = CM_LARGE_PIC;
3471 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3472 error ("code model %qs not supported in the %s bit mode",
3473 "large", "32");
3474 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3475 error ("code model %qs not supported in x32 mode",
3476 "large");
3477 break;
3479 case CM_32:
3480 if (opts->x_flag_pic)
3481 error ("code model %s does not support PIC mode", "32");
3482 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3483 error ("code model %qs not supported in the %s bit mode",
3484 "32", "64");
3485 break;
3487 case CM_KERNEL:
3488 if (opts->x_flag_pic)
3490 error ("code model %s does not support PIC mode", "kernel");
3491 opts->x_ix86_cmodel = CM_32;
3493 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "kernel", "32");
3496 break;
3498 default:
3499 gcc_unreachable ();
3502 else
3504 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3505 use of rip-relative addressing. This eliminates fixups that
3506 would otherwise be needed if this object is to be placed in a
3507 DLL, and is essentially just as efficient as direct addressing. */
3508 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3509 && (TARGET_RDOS || TARGET_PECOFF))
3510 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3511 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3512 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3513 else
3514 opts->x_ix86_cmodel = CM_32;
3516 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3518 error ("-masm=intel not supported in this configuration");
3519 opts->x_ix86_asm_dialect = ASM_ATT;
3521 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3522 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3523 sorry ("%i-bit mode not compiled in",
3524 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3526 for (i = 0; i < pta_size; i++)
3527 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3529 ix86_schedule = processor_alias_table[i].schedule;
3530 ix86_arch = processor_alias_table[i].processor;
3531 /* Default cpu tuning to the architecture. */
3532 ix86_tune = ix86_arch;
3534 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3535 && !(processor_alias_table[i].flags & PTA_64BIT))
3536 error ("CPU you selected does not support x86-64 "
3537 "instruction set");
3539 if (processor_alias_table[i].flags & PTA_MMX
3540 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3541 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3542 if (processor_alias_table[i].flags & PTA_3DNOW
3543 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3544 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3545 if (processor_alias_table[i].flags & PTA_3DNOW_A
3546 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3547 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3548 if (processor_alias_table[i].flags & PTA_SSE
3549 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3550 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3551 if (processor_alias_table[i].flags & PTA_SSE2
3552 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3554 if (processor_alias_table[i].flags & PTA_SSE3
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3557 if (processor_alias_table[i].flags & PTA_SSSE3
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3560 if (processor_alias_table[i].flags & PTA_SSE4_1
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3563 if (processor_alias_table[i].flags & PTA_SSE4_2
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3566 if (processor_alias_table[i].flags & PTA_AVX
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3569 if (processor_alias_table[i].flags & PTA_AVX2
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3572 if (processor_alias_table[i].flags & PTA_FMA
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3575 if (processor_alias_table[i].flags & PTA_SSE4A
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3578 if (processor_alias_table[i].flags & PTA_FMA4
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3581 if (processor_alias_table[i].flags & PTA_XOP
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3584 if (processor_alias_table[i].flags & PTA_LWP
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3587 if (processor_alias_table[i].flags & PTA_ABM
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3590 if (processor_alias_table[i].flags & PTA_BMI
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3593 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3596 if (processor_alias_table[i].flags & PTA_TBM
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3599 if (processor_alias_table[i].flags & PTA_BMI2
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3602 if (processor_alias_table[i].flags & PTA_CX16
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3605 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3608 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3609 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3610 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3611 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3612 if (processor_alias_table[i].flags & PTA_MOVBE
3613 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3614 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3615 if (processor_alias_table[i].flags & PTA_AES
3616 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3617 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3618 if (processor_alias_table[i].flags & PTA_SHA
3619 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3620 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3621 if (processor_alias_table[i].flags & PTA_PCLMUL
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3624 if (processor_alias_table[i].flags & PTA_FSGSBASE
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3627 if (processor_alias_table[i].flags & PTA_RDRND
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3630 if (processor_alias_table[i].flags & PTA_F16C
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3633 if (processor_alias_table[i].flags & PTA_RTM
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3636 if (processor_alias_table[i].flags & PTA_HLE
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3639 if (processor_alias_table[i].flags & PTA_PRFCHW
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3642 if (processor_alias_table[i].flags & PTA_RDSEED
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3645 if (processor_alias_table[i].flags & PTA_ADX
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3648 if (processor_alias_table[i].flags & PTA_FXSR
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3651 if (processor_alias_table[i].flags & PTA_XSAVE
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3654 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3657 if (processor_alias_table[i].flags & PTA_AVX512F
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3660 if (processor_alias_table[i].flags & PTA_AVX512ER
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3663 if (processor_alias_table[i].flags & PTA_AVX512PF
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3666 if (processor_alias_table[i].flags & PTA_AVX512CD
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3669 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3670 x86_prefetch_sse = true;
3672 break;
3675 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3676 error ("generic CPU can be used only for %stune=%s %s",
3677 prefix, suffix, sw);
3678 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3679 error ("intel CPU can be used only for %stune=%s %s",
3680 prefix, suffix, sw);
3681 else if (i == pta_size)
3682 error ("bad value (%s) for %sarch=%s %s",
3683 opts->x_ix86_arch_string, prefix, suffix, sw);
3685 ix86_arch_mask = 1u << ix86_arch;
3686 for (i = 0; i < X86_ARCH_LAST; ++i)
3687 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3689 for (i = 0; i < pta_size; i++)
3690 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3692 ix86_schedule = processor_alias_table[i].schedule;
3693 ix86_tune = processor_alias_table[i].processor;
3694 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3696 if (!(processor_alias_table[i].flags & PTA_64BIT))
3698 if (ix86_tune_defaulted)
3700 opts->x_ix86_tune_string = "x86-64";
3701 for (i = 0; i < pta_size; i++)
3702 if (! strcmp (opts->x_ix86_tune_string,
3703 processor_alias_table[i].name))
3704 break;
3705 ix86_schedule = processor_alias_table[i].schedule;
3706 ix86_tune = processor_alias_table[i].processor;
3708 else
3709 error ("CPU you selected does not support x86-64 "
3710 "instruction set");
3713 /* Intel CPUs have always interpreted SSE prefetch instructions as
3714 NOPs; so, we can enable SSE prefetch instructions even when
3715 -mtune (rather than -march) points us to a processor that has them.
3716 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3717 higher processors. */
3718 if (TARGET_CMOV
3719 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3720 x86_prefetch_sse = true;
3721 break;
3724 if (ix86_tune_specified && i == pta_size)
3725 error ("bad value (%s) for %stune=%s %s",
3726 opts->x_ix86_tune_string, prefix, suffix, sw);
3728 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3730 #ifndef USE_IX86_FRAME_POINTER
3731 #define USE_IX86_FRAME_POINTER 0
3732 #endif
3734 #ifndef USE_X86_64_FRAME_POINTER
3735 #define USE_X86_64_FRAME_POINTER 0
3736 #endif
3738 /* Set the default values for switches whose default depends on TARGET_64BIT
3739 in case they weren't overwritten by command line options. */
3740 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3742 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3743 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3744 if (opts->x_flag_asynchronous_unwind_tables
3745 && !opts_set->x_flag_unwind_tables
3746 && TARGET_64BIT_MS_ABI)
3747 opts->x_flag_unwind_tables = 1;
3748 if (opts->x_flag_asynchronous_unwind_tables == 2)
3749 opts->x_flag_unwind_tables
3750 = opts->x_flag_asynchronous_unwind_tables = 1;
3751 if (opts->x_flag_pcc_struct_return == 2)
3752 opts->x_flag_pcc_struct_return = 0;
3754 else
3756 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3757 opts->x_flag_omit_frame_pointer
3758 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3759 if (opts->x_flag_asynchronous_unwind_tables == 2)
3760 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3761 if (opts->x_flag_pcc_struct_return == 2)
3762 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3765 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3766 if (opts->x_optimize_size)
3767 ix86_cost = &ix86_size_cost;
3768 else
3769 ix86_cost = ix86_tune_cost;
3771 /* Arrange to set up i386_stack_locals for all functions. */
3772 init_machine_status = ix86_init_machine_status;
3774 /* Validate -mregparm= value. */
3775 if (opts_set->x_ix86_regparm)
3777 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3778 warning (0, "-mregparm is ignored in 64-bit mode");
3779 if (opts->x_ix86_regparm > REGPARM_MAX)
3781 error ("-mregparm=%d is not between 0 and %d",
3782 opts->x_ix86_regparm, REGPARM_MAX);
3783 opts->x_ix86_regparm = 0;
3786 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3787 opts->x_ix86_regparm = REGPARM_MAX;
3789 /* Default align_* from the processor table. */
3790 if (opts->x_align_loops == 0)
3792 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3793 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3795 if (opts->x_align_jumps == 0)
3797 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3798 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3800 if (opts->x_align_functions == 0)
3802 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3805 /* Provide default for -mbranch-cost= value. */
3806 if (!opts_set->x_ix86_branch_cost)
3807 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3809 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3811 opts->x_target_flags
3812 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3814 /* Enable by default the SSE and MMX builtins. Do allow the user to
3815 explicitly disable any of these. In particular, disabling SSE and
3816 MMX for kernel code is extremely useful. */
3817 if (!ix86_arch_specified)
3818 opts->x_ix86_isa_flags
3819 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3820 | TARGET_SUBTARGET64_ISA_DEFAULT)
3821 & ~opts->x_ix86_isa_flags_explicit);
3823 if (TARGET_RTD_P (opts->x_target_flags))
3824 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3826 else
3828 opts->x_target_flags
3829 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3831 if (!ix86_arch_specified)
3832 opts->x_ix86_isa_flags
3833 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3835 /* i386 ABI does not specify red zone. It still makes sense to use it
3836 when programmer takes care to stack from being destroyed. */
3837 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3838 opts->x_target_flags |= MASK_NO_RED_ZONE;
3841 /* Keep nonleaf frame pointers. */
3842 if (opts->x_flag_omit_frame_pointer)
3843 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3844 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3845 opts->x_flag_omit_frame_pointer = 1;
3847 /* If we're doing fast math, we don't care about comparison order
3848 wrt NaNs. This lets us use a shorter comparison sequence. */
3849 if (opts->x_flag_finite_math_only)
3850 opts->x_target_flags &= ~MASK_IEEE_FP;
3852 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3853 since the insns won't need emulation. */
3854 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3855 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3857 /* Likewise, if the target doesn't have a 387, or we've specified
3858 software floating point, don't use 387 inline intrinsics. */
3859 if (!TARGET_80387_P (opts->x_target_flags))
3860 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3862 /* Turn on MMX builtins for -msse. */
3863 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3864 opts->x_ix86_isa_flags
3865 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3867 /* Enable SSE prefetch. */
3868 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3869 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3870 x86_prefetch_sse = true;
3872 /* Enable prefetch{,w} instructions for -m3dnow. */
3873 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags))
3874 opts->x_ix86_isa_flags
3875 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3877 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3878 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3879 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3880 opts->x_ix86_isa_flags
3881 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3883 /* Enable lzcnt instruction for -mabm. */
3884 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3885 opts->x_ix86_isa_flags
3886 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3888 /* Validate -mpreferred-stack-boundary= value or default it to
3889 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3890 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3891 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3893 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3894 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3895 int max = (TARGET_SEH ? 4 : 12);
3897 if (opts->x_ix86_preferred_stack_boundary_arg < min
3898 || opts->x_ix86_preferred_stack_boundary_arg > max)
3900 if (min == max)
3901 error ("-mpreferred-stack-boundary is not supported "
3902 "for this target");
3903 else
3904 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3905 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3907 else
3908 ix86_preferred_stack_boundary
3909 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3912 /* Set the default value for -mstackrealign. */
3913 if (opts->x_ix86_force_align_arg_pointer == -1)
3914 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3916 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3918 /* Validate -mincoming-stack-boundary= value or default it to
3919 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3920 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3921 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3923 if (opts->x_ix86_incoming_stack_boundary_arg
3924 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3925 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3926 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3927 opts->x_ix86_incoming_stack_boundary_arg,
3928 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3929 else
3931 ix86_user_incoming_stack_boundary
3932 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3933 ix86_incoming_stack_boundary
3934 = ix86_user_incoming_stack_boundary;
3938 /* Accept -msseregparm only if at least SSE support is enabled. */
3939 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3940 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3941 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3943 if (opts_set->x_ix86_fpmath)
3945 if (opts->x_ix86_fpmath & FPMATH_SSE)
3947 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3949 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3950 opts->x_ix86_fpmath = FPMATH_387;
3952 else if ((opts->x_ix86_fpmath & FPMATH_387)
3953 && !TARGET_80387_P (opts->x_target_flags))
3955 warning (0, "387 instruction set disabled, using SSE arithmetics");
3956 opts->x_ix86_fpmath = FPMATH_SSE;
3960 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3961 fpmath=387. The second is however default at many targets since the
3962 extra 80bit precision of temporaries is considered to be part of ABI.
3963 Overwrite the default at least for -ffast-math.
3964 TODO: -mfpmath=both seems to produce same performing code with bit
3965 smaller binaries. It is however not clear if register allocation is
3966 ready for this setting.
3967 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3968 codegen. We may switch to 387 with -ffast-math for size optimized
3969 functions. */
3970 else if (fast_math_flags_set_p (&global_options)
3971 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3972 opts->x_ix86_fpmath = FPMATH_SSE;
3973 else
3974 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3976 /* If the i387 is disabled, then do not return values in it. */
3977 if (!TARGET_80387_P (opts->x_target_flags))
3978 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3980 /* Use external vectorized library in vectorizing intrinsics. */
3981 if (opts_set->x_ix86_veclibabi_type)
3982 switch (opts->x_ix86_veclibabi_type)
3984 case ix86_veclibabi_type_svml:
3985 ix86_veclib_handler = ix86_veclibabi_svml;
3986 break;
3988 case ix86_veclibabi_type_acml:
3989 ix86_veclib_handler = ix86_veclibabi_acml;
3990 break;
3992 default:
3993 gcc_unreachable ();
3996 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
3997 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
3998 && !opts->x_optimize_size)
3999 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4001 /* If stack probes are required, the space used for large function
4002 arguments on the stack must also be probed, so enable
4003 -maccumulate-outgoing-args so this happens in the prologue. */
4004 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4005 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4007 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4008 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4009 "for correctness", prefix, suffix);
4010 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4013 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4015 char *p;
4016 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4017 p = strchr (internal_label_prefix, 'X');
4018 internal_label_prefix_len = p - internal_label_prefix;
4019 *p = '\0';
4022 /* When scheduling description is not available, disable scheduler pass
4023 so it won't slow down the compilation and make x87 code slower. */
4024 if (!TARGET_SCHEDULE)
4025 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4027 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4028 ix86_tune_cost->simultaneous_prefetches,
4029 opts->x_param_values,
4030 opts_set->x_param_values);
4031 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4032 ix86_tune_cost->prefetch_block,
4033 opts->x_param_values,
4034 opts_set->x_param_values);
4035 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4036 ix86_tune_cost->l1_cache_size,
4037 opts->x_param_values,
4038 opts_set->x_param_values);
4039 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4040 ix86_tune_cost->l2_cache_size,
4041 opts->x_param_values,
4042 opts_set->x_param_values);
4044 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4045 if (opts->x_flag_prefetch_loop_arrays < 0
4046 && HAVE_prefetch
4047 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4048 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4049 opts->x_flag_prefetch_loop_arrays = 1;
4051 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4052 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4053 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4054 targetm.expand_builtin_va_start = NULL;
4056 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4058 ix86_gen_leave = gen_leave_rex64;
4059 if (Pmode == DImode)
4061 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4062 ix86_gen_tls_local_dynamic_base_64
4063 = gen_tls_local_dynamic_base_64_di;
4065 else
4067 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4068 ix86_gen_tls_local_dynamic_base_64
4069 = gen_tls_local_dynamic_base_64_si;
4072 else
4073 ix86_gen_leave = gen_leave;
4075 if (Pmode == DImode)
4077 ix86_gen_add3 = gen_adddi3;
4078 ix86_gen_sub3 = gen_subdi3;
4079 ix86_gen_sub3_carry = gen_subdi3_carry;
4080 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4081 ix86_gen_andsp = gen_anddi3;
4082 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4083 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4084 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4085 ix86_gen_monitor = gen_sse3_monitor_di;
4087 else
4089 ix86_gen_add3 = gen_addsi3;
4090 ix86_gen_sub3 = gen_subsi3;
4091 ix86_gen_sub3_carry = gen_subsi3_carry;
4092 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4093 ix86_gen_andsp = gen_andsi3;
4094 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4095 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4096 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4097 ix86_gen_monitor = gen_sse3_monitor_si;
4100 #ifdef USE_IX86_CLD
4101 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4102 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4103 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4104 #endif
4106 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4108 if (opts->x_flag_fentry > 0)
4109 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4110 "with -fpic");
4111 opts->x_flag_fentry = 0;
4113 else if (TARGET_SEH)
4115 if (opts->x_flag_fentry == 0)
4116 sorry ("-mno-fentry isn%'t compatible with SEH");
4117 opts->x_flag_fentry = 1;
4119 else if (opts->x_flag_fentry < 0)
4121 #if defined(PROFILE_BEFORE_PROLOGUE)
4122 opts->x_flag_fentry = 1;
4123 #else
4124 opts->x_flag_fentry = 0;
4125 #endif
4128 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4129 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4130 AVX unaligned load/store. */
4131 if (!opts->x_optimize_size)
4133 if (flag_expensive_optimizations
4134 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4135 opts->x_target_flags |= MASK_VZEROUPPER;
4136 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4137 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4138 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4139 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4140 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4141 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4142 /* Enable 128-bit AVX instruction generation
4143 for the auto-vectorizer. */
4144 if (TARGET_AVX128_OPTIMAL
4145 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4146 opts->x_target_flags |= MASK_PREFER_AVX128;
4149 if (opts->x_ix86_recip_name)
4151 char *p = ASTRDUP (opts->x_ix86_recip_name);
4152 char *q;
4153 unsigned int mask, i;
4154 bool invert;
4156 while ((q = strtok (p, ",")) != NULL)
4158 p = NULL;
4159 if (*q == '!')
4161 invert = true;
4162 q++;
4164 else
4165 invert = false;
4167 if (!strcmp (q, "default"))
4168 mask = RECIP_MASK_ALL;
4169 else
4171 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4172 if (!strcmp (q, recip_options[i].string))
4174 mask = recip_options[i].mask;
4175 break;
4178 if (i == ARRAY_SIZE (recip_options))
4180 error ("unknown option for -mrecip=%s", q);
4181 invert = false;
4182 mask = RECIP_MASK_NONE;
4186 opts->x_recip_mask_explicit |= mask;
4187 if (invert)
4188 opts->x_recip_mask &= ~mask;
4189 else
4190 opts->x_recip_mask |= mask;
4194 if (TARGET_RECIP_P (opts->x_target_flags))
4195 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4196 else if (opts_set->x_target_flags & MASK_RECIP)
4197 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4199 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4200 for 64-bit Bionic. */
4201 if (TARGET_HAS_BIONIC
4202 && !(opts_set->x_target_flags
4203 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4204 opts->x_target_flags |= (TARGET_64BIT
4205 ? MASK_LONG_DOUBLE_128
4206 : MASK_LONG_DOUBLE_64);
4208 /* Only one of them can be active. */
4209 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4210 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4212 /* Save the initial options in case the user does function specific
4213 options. */
4214 if (main_args_p)
4215 target_option_default_node = target_option_current_node
4216 = build_target_option_node (opts);
4218 /* Handle stack protector */
4219 if (!opts_set->x_ix86_stack_protector_guard)
4220 opts->x_ix86_stack_protector_guard
4221 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4223 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4224 if (opts->x_ix86_tune_memcpy_strategy)
4226 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4227 ix86_parse_stringop_strategy_string (str, false);
4228 free (str);
4231 if (opts->x_ix86_tune_memset_strategy)
4233 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4234 ix86_parse_stringop_strategy_string (str, true);
4235 free (str);
4239 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4241 static void
4242 ix86_option_override (void)
4244 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4245 static struct register_pass_info insert_vzeroupper_info
4246 = { pass_insert_vzeroupper, "reload",
4247 1, PASS_POS_INSERT_AFTER
4250 ix86_option_override_internal (true, &global_options, &global_options_set);
4253 /* This needs to be done at start up. It's convenient to do it here. */
4254 register_pass (&insert_vzeroupper_info);
4257 /* Update register usage after having seen the compiler flags. */
4259 static void
4260 ix86_conditional_register_usage (void)
4262 int i, c_mask;
4263 unsigned int j;
4265 /* The PIC register, if it exists, is fixed. */
4266 j = PIC_OFFSET_TABLE_REGNUM;
4267 if (j != INVALID_REGNUM)
4268 fixed_regs[j] = call_used_regs[j] = 1;
4270 /* For 32-bit targets, squash the REX registers. */
4271 if (! TARGET_64BIT)
4273 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4274 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4275 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4276 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4277 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4278 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4281 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4282 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4283 : TARGET_64BIT ? (1 << 2)
4284 : (1 << 1));
4286 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4288 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4290 /* Set/reset conditionally defined registers from
4291 CALL_USED_REGISTERS initializer. */
4292 if (call_used_regs[i] > 1)
4293 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4295 /* Calculate registers of CLOBBERED_REGS register set
4296 as call used registers from GENERAL_REGS register set. */
4297 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4298 && call_used_regs[i])
4299 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4302 /* If MMX is disabled, squash the registers. */
4303 if (! TARGET_MMX)
4304 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4305 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4306 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4308 /* If SSE is disabled, squash the registers. */
4309 if (! TARGET_SSE)
4310 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4311 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4312 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4314 /* If the FPU is disabled, squash the registers. */
4315 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4317 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4318 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4320 /* If AVX512F is disabled, squash the registers. */
4321 if (! TARGET_AVX512F)
4323 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4324 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4326 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4327 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4332 /* Save the current options */
4334 static void
4335 ix86_function_specific_save (struct cl_target_option *ptr,
4336 struct gcc_options *opts)
4338 ptr->arch = ix86_arch;
4339 ptr->schedule = ix86_schedule;
4340 ptr->tune = ix86_tune;
4341 ptr->branch_cost = ix86_branch_cost;
4342 ptr->tune_defaulted = ix86_tune_defaulted;
4343 ptr->arch_specified = ix86_arch_specified;
4344 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4345 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4346 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4347 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4348 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4349 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4350 ptr->x_ix86_abi = opts->x_ix86_abi;
4351 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4352 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4353 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4354 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4355 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4356 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4357 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4358 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4359 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4360 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4361 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4362 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4363 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4364 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4365 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4366 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4367 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4368 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4369 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4370 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4372 /* The fields are char but the variables are not; make sure the
4373 values fit in the fields. */
4374 gcc_assert (ptr->arch == ix86_arch);
4375 gcc_assert (ptr->schedule == ix86_schedule);
4376 gcc_assert (ptr->tune == ix86_tune);
4377 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4380 /* Restore the current options */
4382 static void
4383 ix86_function_specific_restore (struct gcc_options *opts,
4384 struct cl_target_option *ptr)
4386 enum processor_type old_tune = ix86_tune;
4387 enum processor_type old_arch = ix86_arch;
4388 unsigned int ix86_arch_mask;
4389 int i;
4391 /* We don't change -fPIC. */
4392 opts->x_flag_pic = flag_pic;
4394 ix86_arch = (enum processor_type) ptr->arch;
4395 ix86_schedule = (enum attr_cpu) ptr->schedule;
4396 ix86_tune = (enum processor_type) ptr->tune;
4397 opts->x_ix86_branch_cost = ptr->branch_cost;
4398 ix86_tune_defaulted = ptr->tune_defaulted;
4399 ix86_arch_specified = ptr->arch_specified;
4400 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4401 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4402 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4403 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4404 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4405 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4406 opts->x_ix86_abi = ptr->x_ix86_abi;
4407 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4408 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4409 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4410 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4411 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4412 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4413 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4414 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4415 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4416 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4417 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4418 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4419 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4420 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4421 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4422 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4423 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4424 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4425 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4426 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4428 /* Recreate the arch feature tests if the arch changed */
4429 if (old_arch != ix86_arch)
4431 ix86_arch_mask = 1u << ix86_arch;
4432 for (i = 0; i < X86_ARCH_LAST; ++i)
4433 ix86_arch_features[i]
4434 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4437 /* Recreate the tune optimization tests */
4438 if (old_tune != ix86_tune)
4439 set_ix86_tune_features (ix86_tune, false);
4442 /* Print the current options */
4444 static void
4445 ix86_function_specific_print (FILE *file, int indent,
4446 struct cl_target_option *ptr)
4448 char *target_string
4449 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4450 NULL, NULL, ptr->x_ix86_fpmath, false);
4452 gcc_assert (ptr->arch < PROCESSOR_max);
4453 fprintf (file, "%*sarch = %d (%s)\n",
4454 indent, "",
4455 ptr->arch, processor_target_table[ptr->arch].name);
4457 gcc_assert (ptr->tune < PROCESSOR_max);
4458 fprintf (file, "%*stune = %d (%s)\n",
4459 indent, "",
4460 ptr->tune, processor_target_table[ptr->tune].name);
4462 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4464 if (target_string)
4466 fprintf (file, "%*s%s\n", indent, "", target_string);
4467 free (target_string);
4472 /* Inner function to process the attribute((target(...))), take an argument and
4473 set the current options from the argument. If we have a list, recursively go
4474 over the list. */
4476 static bool
4477 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4478 struct gcc_options *opts,
4479 struct gcc_options *opts_set,
4480 struct gcc_options *enum_opts_set)
4482 char *next_optstr;
4483 bool ret = true;
4485 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4486 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4487 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4488 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4489 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4491 enum ix86_opt_type
4493 ix86_opt_unknown,
4494 ix86_opt_yes,
4495 ix86_opt_no,
4496 ix86_opt_str,
4497 ix86_opt_enum,
4498 ix86_opt_isa
4501 static const struct
4503 const char *string;
4504 size_t len;
4505 enum ix86_opt_type type;
4506 int opt;
4507 int mask;
4508 } attrs[] = {
4509 /* isa options */
4510 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4511 IX86_ATTR_ISA ("abm", OPT_mabm),
4512 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4513 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4514 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4515 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4516 IX86_ATTR_ISA ("aes", OPT_maes),
4517 IX86_ATTR_ISA ("sha", OPT_msha),
4518 IX86_ATTR_ISA ("avx", OPT_mavx),
4519 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4520 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4521 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4522 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4523 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4524 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4525 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4526 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4527 IX86_ATTR_ISA ("sse", OPT_msse),
4528 IX86_ATTR_ISA ("sse2", OPT_msse2),
4529 IX86_ATTR_ISA ("sse3", OPT_msse3),
4530 IX86_ATTR_ISA ("sse4", OPT_msse4),
4531 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4532 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4533 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4534 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4535 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4536 IX86_ATTR_ISA ("fma", OPT_mfma),
4537 IX86_ATTR_ISA ("xop", OPT_mxop),
4538 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4539 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4540 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4541 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4542 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4543 IX86_ATTR_ISA ("hle", OPT_mhle),
4544 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4545 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4546 IX86_ATTR_ISA ("adx", OPT_madx),
4547 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4548 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4549 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4551 /* enum options */
4552 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4554 /* string options */
4555 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4556 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4558 /* flag options */
4559 IX86_ATTR_YES ("cld",
4560 OPT_mcld,
4561 MASK_CLD),
4563 IX86_ATTR_NO ("fancy-math-387",
4564 OPT_mfancy_math_387,
4565 MASK_NO_FANCY_MATH_387),
4567 IX86_ATTR_YES ("ieee-fp",
4568 OPT_mieee_fp,
4569 MASK_IEEE_FP),
4571 IX86_ATTR_YES ("inline-all-stringops",
4572 OPT_minline_all_stringops,
4573 MASK_INLINE_ALL_STRINGOPS),
4575 IX86_ATTR_YES ("inline-stringops-dynamically",
4576 OPT_minline_stringops_dynamically,
4577 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4579 IX86_ATTR_NO ("align-stringops",
4580 OPT_mno_align_stringops,
4581 MASK_NO_ALIGN_STRINGOPS),
4583 IX86_ATTR_YES ("recip",
4584 OPT_mrecip,
4585 MASK_RECIP),
4589 /* If this is a list, recurse to get the options. */
4590 if (TREE_CODE (args) == TREE_LIST)
4592 bool ret = true;
4594 for (; args; args = TREE_CHAIN (args))
4595 if (TREE_VALUE (args)
4596 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4597 p_strings, opts, opts_set,
4598 enum_opts_set))
4599 ret = false;
4601 return ret;
4604 else if (TREE_CODE (args) != STRING_CST)
4606 error ("attribute %<target%> argument not a string");
4607 return false;
4610 /* Handle multiple arguments separated by commas. */
4611 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4613 while (next_optstr && *next_optstr != '\0')
4615 char *p = next_optstr;
4616 char *orig_p = p;
4617 char *comma = strchr (next_optstr, ',');
4618 const char *opt_string;
4619 size_t len, opt_len;
4620 int opt;
4621 bool opt_set_p;
4622 char ch;
4623 unsigned i;
4624 enum ix86_opt_type type = ix86_opt_unknown;
4625 int mask = 0;
4627 if (comma)
4629 *comma = '\0';
4630 len = comma - next_optstr;
4631 next_optstr = comma + 1;
4633 else
4635 len = strlen (p);
4636 next_optstr = NULL;
4639 /* Recognize no-xxx. */
4640 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4642 opt_set_p = false;
4643 p += 3;
4644 len -= 3;
4646 else
4647 opt_set_p = true;
4649 /* Find the option. */
4650 ch = *p;
4651 opt = N_OPTS;
4652 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4654 type = attrs[i].type;
4655 opt_len = attrs[i].len;
4656 if (ch == attrs[i].string[0]
4657 && ((type != ix86_opt_str && type != ix86_opt_enum)
4658 ? len == opt_len
4659 : len > opt_len)
4660 && memcmp (p, attrs[i].string, opt_len) == 0)
4662 opt = attrs[i].opt;
4663 mask = attrs[i].mask;
4664 opt_string = attrs[i].string;
4665 break;
4669 /* Process the option. */
4670 if (opt == N_OPTS)
4672 error ("attribute(target(\"%s\")) is unknown", orig_p);
4673 ret = false;
4676 else if (type == ix86_opt_isa)
4678 struct cl_decoded_option decoded;
4680 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4681 ix86_handle_option (opts, opts_set,
4682 &decoded, input_location);
4685 else if (type == ix86_opt_yes || type == ix86_opt_no)
4687 if (type == ix86_opt_no)
4688 opt_set_p = !opt_set_p;
4690 if (opt_set_p)
4691 opts->x_target_flags |= mask;
4692 else
4693 opts->x_target_flags &= ~mask;
4696 else if (type == ix86_opt_str)
4698 if (p_strings[opt])
4700 error ("option(\"%s\") was already specified", opt_string);
4701 ret = false;
4703 else
4704 p_strings[opt] = xstrdup (p + opt_len);
4707 else if (type == ix86_opt_enum)
4709 bool arg_ok;
4710 int value;
4712 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4713 if (arg_ok)
4714 set_option (opts, enum_opts_set, opt, value,
4715 p + opt_len, DK_UNSPECIFIED, input_location,
4716 global_dc);
4717 else
4719 error ("attribute(target(\"%s\")) is unknown", orig_p);
4720 ret = false;
4724 else
4725 gcc_unreachable ();
4728 return ret;
4731 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4733 tree
4734 ix86_valid_target_attribute_tree (tree args,
4735 struct gcc_options *opts,
4736 struct gcc_options *opts_set)
4738 const char *orig_arch_string = opts->x_ix86_arch_string;
4739 const char *orig_tune_string = opts->x_ix86_tune_string;
4740 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4741 int orig_tune_defaulted = ix86_tune_defaulted;
4742 int orig_arch_specified = ix86_arch_specified;
4743 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4744 tree t = NULL_TREE;
4745 int i;
4746 struct cl_target_option *def
4747 = TREE_TARGET_OPTION (target_option_default_node);
4748 struct gcc_options enum_opts_set;
4750 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4752 /* Process each of the options on the chain. */
4753 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4754 opts_set, &enum_opts_set))
4755 return error_mark_node;
4757 /* If the changed options are different from the default, rerun
4758 ix86_option_override_internal, and then save the options away.
4759 The string options are are attribute options, and will be undone
4760 when we copy the save structure. */
4761 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4762 || opts->x_target_flags != def->x_target_flags
4763 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4764 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4765 || enum_opts_set.x_ix86_fpmath)
4767 /* If we are using the default tune= or arch=, undo the string assigned,
4768 and use the default. */
4769 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4770 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4771 else if (!orig_arch_specified)
4772 opts->x_ix86_arch_string = NULL;
4774 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4775 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4776 else if (orig_tune_defaulted)
4777 opts->x_ix86_tune_string = NULL;
4779 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4780 if (enum_opts_set.x_ix86_fpmath)
4781 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4782 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4783 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4785 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4786 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4789 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4790 ix86_option_override_internal (false, opts, opts_set);
4792 /* Add any builtin functions with the new isa if any. */
4793 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4795 /* Save the current options unless we are validating options for
4796 #pragma. */
4797 t = build_target_option_node (opts);
4799 opts->x_ix86_arch_string = orig_arch_string;
4800 opts->x_ix86_tune_string = orig_tune_string;
4801 opts_set->x_ix86_fpmath = orig_fpmath_set;
4803 /* Free up memory allocated to hold the strings */
4804 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4805 free (option_strings[i]);
4808 return t;
4811 /* Hook to validate attribute((target("string"))). */
4813 static bool
4814 ix86_valid_target_attribute_p (tree fndecl,
4815 tree ARG_UNUSED (name),
4816 tree args,
4817 int ARG_UNUSED (flags))
4819 struct gcc_options func_options;
4820 tree new_target, new_optimize;
4821 bool ret = true;
4823 /* attribute((target("default"))) does nothing, beyond
4824 affecting multi-versioning. */
4825 if (TREE_VALUE (args)
4826 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4827 && TREE_CHAIN (args) == NULL_TREE
4828 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4829 return true;
4831 tree old_optimize = build_optimization_node (&global_options);
4833 /* Get the optimization options of the current function. */
4834 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4836 if (!func_optimize)
4837 func_optimize = old_optimize;
4839 /* Init func_options. */
4840 memset (&func_options, 0, sizeof (func_options));
4841 init_options_struct (&func_options, NULL);
4842 lang_hooks.init_options_struct (&func_options);
4844 cl_optimization_restore (&func_options,
4845 TREE_OPTIMIZATION (func_optimize));
4847 /* Initialize func_options to the default before its target options can
4848 be set. */
4849 cl_target_option_restore (&func_options,
4850 TREE_TARGET_OPTION (target_option_default_node));
4852 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4853 &global_options_set);
4855 new_optimize = build_optimization_node (&func_options);
4857 if (new_target == error_mark_node)
4858 ret = false;
4860 else if (fndecl && new_target)
4862 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4864 if (old_optimize != new_optimize)
4865 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4868 return ret;
4872 /* Hook to determine if one function can safely inline another. */
4874 static bool
4875 ix86_can_inline_p (tree caller, tree callee)
4877 bool ret = false;
4878 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4879 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4881 /* If callee has no option attributes, then it is ok to inline. */
4882 if (!callee_tree)
4883 ret = true;
4885 /* If caller has no option attributes, but callee does then it is not ok to
4886 inline. */
4887 else if (!caller_tree)
4888 ret = false;
4890 else
4892 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4893 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4895 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4896 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4897 function. */
4898 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4899 != callee_opts->x_ix86_isa_flags)
4900 ret = false;
4902 /* See if we have the same non-isa options. */
4903 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4904 ret = false;
4906 /* See if arch, tune, etc. are the same. */
4907 else if (caller_opts->arch != callee_opts->arch)
4908 ret = false;
4910 else if (caller_opts->tune != callee_opts->tune)
4911 ret = false;
4913 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4914 ret = false;
4916 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4917 ret = false;
4919 else
4920 ret = true;
4923 return ret;
4927 /* Remember the last target of ix86_set_current_function. */
4928 static GTY(()) tree ix86_previous_fndecl;
4930 /* Invalidate ix86_previous_fndecl cache. */
4931 void
4932 ix86_reset_previous_fndecl (void)
4934 ix86_previous_fndecl = NULL_TREE;
4937 /* Establish appropriate back-end context for processing the function
4938 FNDECL. The argument might be NULL to indicate processing at top
4939 level, outside of any function scope. */
4940 static void
4941 ix86_set_current_function (tree fndecl)
4943 /* Only change the context if the function changes. This hook is called
4944 several times in the course of compiling a function, and we don't want to
4945 slow things down too much or call target_reinit when it isn't safe. */
4946 if (fndecl && fndecl != ix86_previous_fndecl)
4948 tree old_tree = (ix86_previous_fndecl
4949 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4950 : NULL_TREE);
4952 tree new_tree = (fndecl
4953 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4954 : NULL_TREE);
4956 ix86_previous_fndecl = fndecl;
4957 if (old_tree == new_tree)
4960 else if (new_tree)
4962 cl_target_option_restore (&global_options,
4963 TREE_TARGET_OPTION (new_tree));
4964 if (TREE_TARGET_GLOBALS (new_tree))
4965 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4966 else
4967 TREE_TARGET_GLOBALS (new_tree)
4968 = save_target_globals_default_opts ();
4971 else if (old_tree)
4973 new_tree = target_option_current_node;
4974 cl_target_option_restore (&global_options,
4975 TREE_TARGET_OPTION (new_tree));
4976 if (TREE_TARGET_GLOBALS (new_tree))
4977 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4978 else if (new_tree == target_option_default_node)
4979 restore_target_globals (&default_target_globals);
4980 else
4981 TREE_TARGET_GLOBALS (new_tree)
4982 = save_target_globals_default_opts ();
4988 /* Return true if this goes in large data/bss. */
4990 static bool
4991 ix86_in_large_data_p (tree exp)
4993 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4994 return false;
4996 /* Functions are never large data. */
4997 if (TREE_CODE (exp) == FUNCTION_DECL)
4998 return false;
5000 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5002 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5003 if (strcmp (section, ".ldata") == 0
5004 || strcmp (section, ".lbss") == 0)
5005 return true;
5006 return false;
5008 else
5010 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5012 /* If this is an incomplete type with size 0, then we can't put it
5013 in data because it might be too big when completed. */
5014 if (!size || size > ix86_section_threshold)
5015 return true;
5018 return false;
5021 /* Switch to the appropriate section for output of DECL.
5022 DECL is either a `VAR_DECL' node or a constant of some sort.
5023 RELOC indicates whether forming the initial value of DECL requires
5024 link-time relocations. */
5026 ATTRIBUTE_UNUSED static section *
5027 x86_64_elf_select_section (tree decl, int reloc,
5028 unsigned HOST_WIDE_INT align)
5030 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5031 && ix86_in_large_data_p (decl))
5033 const char *sname = NULL;
5034 unsigned int flags = SECTION_WRITE;
5035 switch (categorize_decl_for_section (decl, reloc))
5037 case SECCAT_DATA:
5038 sname = ".ldata";
5039 break;
5040 case SECCAT_DATA_REL:
5041 sname = ".ldata.rel";
5042 break;
5043 case SECCAT_DATA_REL_LOCAL:
5044 sname = ".ldata.rel.local";
5045 break;
5046 case SECCAT_DATA_REL_RO:
5047 sname = ".ldata.rel.ro";
5048 break;
5049 case SECCAT_DATA_REL_RO_LOCAL:
5050 sname = ".ldata.rel.ro.local";
5051 break;
5052 case SECCAT_BSS:
5053 sname = ".lbss";
5054 flags |= SECTION_BSS;
5055 break;
5056 case SECCAT_RODATA:
5057 case SECCAT_RODATA_MERGE_STR:
5058 case SECCAT_RODATA_MERGE_STR_INIT:
5059 case SECCAT_RODATA_MERGE_CONST:
5060 sname = ".lrodata";
5061 flags = 0;
5062 break;
5063 case SECCAT_SRODATA:
5064 case SECCAT_SDATA:
5065 case SECCAT_SBSS:
5066 gcc_unreachable ();
5067 case SECCAT_TEXT:
5068 case SECCAT_TDATA:
5069 case SECCAT_TBSS:
5070 /* We don't split these for medium model. Place them into
5071 default sections and hope for best. */
5072 break;
5074 if (sname)
5076 /* We might get called with string constants, but get_named_section
5077 doesn't like them as they are not DECLs. Also, we need to set
5078 flags in that case. */
5079 if (!DECL_P (decl))
5080 return get_section (sname, flags, NULL);
5081 return get_named_section (decl, sname, reloc);
5084 return default_elf_select_section (decl, reloc, align);
5087 /* Select a set of attributes for section NAME based on the properties
5088 of DECL and whether or not RELOC indicates that DECL's initializer
5089 might contain runtime relocations. */
5091 static unsigned int ATTRIBUTE_UNUSED
5092 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5094 unsigned int flags = default_section_type_flags (decl, name, reloc);
5096 if (decl == NULL_TREE
5097 && (strcmp (name, ".ldata.rel.ro") == 0
5098 || strcmp (name, ".ldata.rel.ro.local") == 0))
5099 flags |= SECTION_RELRO;
5101 if (strcmp (name, ".lbss") == 0
5102 || strncmp (name, ".lbss.", 5) == 0
5103 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5104 flags |= SECTION_BSS;
5106 return flags;
5109 /* Build up a unique section name, expressed as a
5110 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5111 RELOC indicates whether the initial value of EXP requires
5112 link-time relocations. */
5114 static void ATTRIBUTE_UNUSED
5115 x86_64_elf_unique_section (tree decl, int reloc)
5117 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5118 && ix86_in_large_data_p (decl))
5120 const char *prefix = NULL;
5121 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5122 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5124 switch (categorize_decl_for_section (decl, reloc))
5126 case SECCAT_DATA:
5127 case SECCAT_DATA_REL:
5128 case SECCAT_DATA_REL_LOCAL:
5129 case SECCAT_DATA_REL_RO:
5130 case SECCAT_DATA_REL_RO_LOCAL:
5131 prefix = one_only ? ".ld" : ".ldata";
5132 break;
5133 case SECCAT_BSS:
5134 prefix = one_only ? ".lb" : ".lbss";
5135 break;
5136 case SECCAT_RODATA:
5137 case SECCAT_RODATA_MERGE_STR:
5138 case SECCAT_RODATA_MERGE_STR_INIT:
5139 case SECCAT_RODATA_MERGE_CONST:
5140 prefix = one_only ? ".lr" : ".lrodata";
5141 break;
5142 case SECCAT_SRODATA:
5143 case SECCAT_SDATA:
5144 case SECCAT_SBSS:
5145 gcc_unreachable ();
5146 case SECCAT_TEXT:
5147 case SECCAT_TDATA:
5148 case SECCAT_TBSS:
5149 /* We don't split these for medium model. Place them into
5150 default sections and hope for best. */
5151 break;
5153 if (prefix)
5155 const char *name, *linkonce;
5156 char *string;
5158 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5159 name = targetm.strip_name_encoding (name);
5161 /* If we're using one_only, then there needs to be a .gnu.linkonce
5162 prefix to the section name. */
5163 linkonce = one_only ? ".gnu.linkonce" : "";
5165 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5167 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5168 return;
5171 default_unique_section (decl, reloc);
5174 #ifdef COMMON_ASM_OP
5175 /* This says how to output assembler code to declare an
5176 uninitialized external linkage data object.
5178 For medium model x86-64 we need to use .largecomm opcode for
5179 large objects. */
5180 void
5181 x86_elf_aligned_common (FILE *file,
5182 const char *name, unsigned HOST_WIDE_INT size,
5183 int align)
5185 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5186 && size > (unsigned int)ix86_section_threshold)
5187 fputs (".largecomm\t", file);
5188 else
5189 fputs (COMMON_ASM_OP, file);
5190 assemble_name (file, name);
5191 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5192 size, align / BITS_PER_UNIT);
5194 #endif
5196 /* Utility function for targets to use in implementing
5197 ASM_OUTPUT_ALIGNED_BSS. */
5199 void
5200 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5201 const char *name, unsigned HOST_WIDE_INT size,
5202 int align)
5204 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5205 && size > (unsigned int)ix86_section_threshold)
5206 switch_to_section (get_named_section (decl, ".lbss", 0));
5207 else
5208 switch_to_section (bss_section);
5209 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5210 #ifdef ASM_DECLARE_OBJECT_NAME
5211 last_assemble_variable_decl = decl;
5212 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5213 #else
5214 /* Standard thing is just output label for the object. */
5215 ASM_OUTPUT_LABEL (file, name);
5216 #endif /* ASM_DECLARE_OBJECT_NAME */
5217 ASM_OUTPUT_SKIP (file, size ? size : 1);
5220 /* Decide whether we must probe the stack before any space allocation
5221 on this target. It's essentially TARGET_STACK_PROBE except when
5222 -fstack-check causes the stack to be already probed differently. */
5224 bool
5225 ix86_target_stack_probe (void)
5227 /* Do not probe the stack twice if static stack checking is enabled. */
5228 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5229 return false;
5231 return TARGET_STACK_PROBE;
5234 /* Decide whether we can make a sibling call to a function. DECL is the
5235 declaration of the function being targeted by the call and EXP is the
5236 CALL_EXPR representing the call. */
5238 static bool
5239 ix86_function_ok_for_sibcall (tree decl, tree exp)
5241 tree type, decl_or_type;
5242 rtx a, b;
5244 /* If we are generating position-independent code, we cannot sibcall
5245 optimize any indirect call, or a direct call to a global function,
5246 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5247 if (!TARGET_MACHO
5248 && !TARGET_64BIT
5249 && flag_pic
5250 && (!decl || !targetm.binds_local_p (decl)))
5251 return false;
5253 /* If we need to align the outgoing stack, then sibcalling would
5254 unalign the stack, which may break the called function. */
5255 if (ix86_minimum_incoming_stack_boundary (true)
5256 < PREFERRED_STACK_BOUNDARY)
5257 return false;
5259 if (decl)
5261 decl_or_type = decl;
5262 type = TREE_TYPE (decl);
5264 else
5266 /* We're looking at the CALL_EXPR, we need the type of the function. */
5267 type = CALL_EXPR_FN (exp); /* pointer expression */
5268 type = TREE_TYPE (type); /* pointer type */
5269 type = TREE_TYPE (type); /* function type */
5270 decl_or_type = type;
5273 /* Check that the return value locations are the same. Like
5274 if we are returning floats on the 80387 register stack, we cannot
5275 make a sibcall from a function that doesn't return a float to a
5276 function that does or, conversely, from a function that does return
5277 a float to a function that doesn't; the necessary stack adjustment
5278 would not be executed. This is also the place we notice
5279 differences in the return value ABI. Note that it is ok for one
5280 of the functions to have void return type as long as the return
5281 value of the other is passed in a register. */
5282 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5283 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5284 cfun->decl, false);
5285 if (STACK_REG_P (a) || STACK_REG_P (b))
5287 if (!rtx_equal_p (a, b))
5288 return false;
5290 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5292 else if (!rtx_equal_p (a, b))
5293 return false;
5295 if (TARGET_64BIT)
5297 /* The SYSV ABI has more call-clobbered registers;
5298 disallow sibcalls from MS to SYSV. */
5299 if (cfun->machine->call_abi == MS_ABI
5300 && ix86_function_type_abi (type) == SYSV_ABI)
5301 return false;
5303 else
5305 /* If this call is indirect, we'll need to be able to use a
5306 call-clobbered register for the address of the target function.
5307 Make sure that all such registers are not used for passing
5308 parameters. Note that DLLIMPORT functions are indirect. */
5309 if (!decl
5310 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5312 if (ix86_function_regparm (type, NULL) >= 3)
5314 /* ??? Need to count the actual number of registers to be used,
5315 not the possible number of registers. Fix later. */
5316 return false;
5321 /* Otherwise okay. That also includes certain types of indirect calls. */
5322 return true;
5325 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5326 and "sseregparm" calling convention attributes;
5327 arguments as in struct attribute_spec.handler. */
5329 static tree
5330 ix86_handle_cconv_attribute (tree *node, tree name,
5331 tree args,
5332 int flags ATTRIBUTE_UNUSED,
5333 bool *no_add_attrs)
5335 if (TREE_CODE (*node) != FUNCTION_TYPE
5336 && TREE_CODE (*node) != METHOD_TYPE
5337 && TREE_CODE (*node) != FIELD_DECL
5338 && TREE_CODE (*node) != TYPE_DECL)
5340 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5341 name);
5342 *no_add_attrs = true;
5343 return NULL_TREE;
5346 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5347 if (is_attribute_p ("regparm", name))
5349 tree cst;
5351 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5353 error ("fastcall and regparm attributes are not compatible");
5356 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5358 error ("regparam and thiscall attributes are not compatible");
5361 cst = TREE_VALUE (args);
5362 if (TREE_CODE (cst) != INTEGER_CST)
5364 warning (OPT_Wattributes,
5365 "%qE attribute requires an integer constant argument",
5366 name);
5367 *no_add_attrs = true;
5369 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5371 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5372 name, REGPARM_MAX);
5373 *no_add_attrs = true;
5376 return NULL_TREE;
5379 if (TARGET_64BIT)
5381 /* Do not warn when emulating the MS ABI. */
5382 if ((TREE_CODE (*node) != FUNCTION_TYPE
5383 && TREE_CODE (*node) != METHOD_TYPE)
5384 || ix86_function_type_abi (*node) != MS_ABI)
5385 warning (OPT_Wattributes, "%qE attribute ignored",
5386 name);
5387 *no_add_attrs = true;
5388 return NULL_TREE;
5391 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5392 if (is_attribute_p ("fastcall", name))
5394 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5396 error ("fastcall and cdecl attributes are not compatible");
5398 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5400 error ("fastcall and stdcall attributes are not compatible");
5402 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5404 error ("fastcall and regparm attributes are not compatible");
5406 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5408 error ("fastcall and thiscall attributes are not compatible");
5412 /* Can combine stdcall with fastcall (redundant), regparm and
5413 sseregparm. */
5414 else if (is_attribute_p ("stdcall", name))
5416 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5418 error ("stdcall and cdecl attributes are not compatible");
5420 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5422 error ("stdcall and fastcall attributes are not compatible");
5424 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5426 error ("stdcall and thiscall attributes are not compatible");
5430 /* Can combine cdecl with regparm and sseregparm. */
5431 else if (is_attribute_p ("cdecl", name))
5433 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5435 error ("stdcall and cdecl attributes are not compatible");
5437 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5439 error ("fastcall and cdecl attributes are not compatible");
5441 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5443 error ("cdecl and thiscall attributes are not compatible");
5446 else if (is_attribute_p ("thiscall", name))
5448 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5449 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5450 name);
5451 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5453 error ("stdcall and thiscall attributes are not compatible");
5455 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5457 error ("fastcall and thiscall attributes are not compatible");
5459 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5461 error ("cdecl and thiscall attributes are not compatible");
5465 /* Can combine sseregparm with all attributes. */
5467 return NULL_TREE;
5470 /* The transactional memory builtins are implicitly regparm or fastcall
5471 depending on the ABI. Override the generic do-nothing attribute that
5472 these builtins were declared with, and replace it with one of the two
5473 attributes that we expect elsewhere. */
5475 static tree
5476 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5477 tree args ATTRIBUTE_UNUSED,
5478 int flags, bool *no_add_attrs)
5480 tree alt;
5482 /* In no case do we want to add the placeholder attribute. */
5483 *no_add_attrs = true;
5485 /* The 64-bit ABI is unchanged for transactional memory. */
5486 if (TARGET_64BIT)
5487 return NULL_TREE;
5489 /* ??? Is there a better way to validate 32-bit windows? We have
5490 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5491 if (CHECK_STACK_LIMIT > 0)
5492 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5493 else
5495 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5496 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5498 decl_attributes (node, alt, flags);
5500 return NULL_TREE;
5503 /* This function determines from TYPE the calling-convention. */
5505 unsigned int
5506 ix86_get_callcvt (const_tree type)
5508 unsigned int ret = 0;
5509 bool is_stdarg;
5510 tree attrs;
5512 if (TARGET_64BIT)
5513 return IX86_CALLCVT_CDECL;
5515 attrs = TYPE_ATTRIBUTES (type);
5516 if (attrs != NULL_TREE)
5518 if (lookup_attribute ("cdecl", attrs))
5519 ret |= IX86_CALLCVT_CDECL;
5520 else if (lookup_attribute ("stdcall", attrs))
5521 ret |= IX86_CALLCVT_STDCALL;
5522 else if (lookup_attribute ("fastcall", attrs))
5523 ret |= IX86_CALLCVT_FASTCALL;
5524 else if (lookup_attribute ("thiscall", attrs))
5525 ret |= IX86_CALLCVT_THISCALL;
5527 /* Regparam isn't allowed for thiscall and fastcall. */
5528 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5530 if (lookup_attribute ("regparm", attrs))
5531 ret |= IX86_CALLCVT_REGPARM;
5532 if (lookup_attribute ("sseregparm", attrs))
5533 ret |= IX86_CALLCVT_SSEREGPARM;
5536 if (IX86_BASE_CALLCVT(ret) != 0)
5537 return ret;
5540 is_stdarg = stdarg_p (type);
5541 if (TARGET_RTD && !is_stdarg)
5542 return IX86_CALLCVT_STDCALL | ret;
5544 if (ret != 0
5545 || is_stdarg
5546 || TREE_CODE (type) != METHOD_TYPE
5547 || ix86_function_type_abi (type) != MS_ABI)
5548 return IX86_CALLCVT_CDECL | ret;
5550 return IX86_CALLCVT_THISCALL;
5553 /* Return 0 if the attributes for two types are incompatible, 1 if they
5554 are compatible, and 2 if they are nearly compatible (which causes a
5555 warning to be generated). */
5557 static int
5558 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5560 unsigned int ccvt1, ccvt2;
5562 if (TREE_CODE (type1) != FUNCTION_TYPE
5563 && TREE_CODE (type1) != METHOD_TYPE)
5564 return 1;
5566 ccvt1 = ix86_get_callcvt (type1);
5567 ccvt2 = ix86_get_callcvt (type2);
5568 if (ccvt1 != ccvt2)
5569 return 0;
5570 if (ix86_function_regparm (type1, NULL)
5571 != ix86_function_regparm (type2, NULL))
5572 return 0;
5574 return 1;
5577 /* Return the regparm value for a function with the indicated TYPE and DECL.
5578 DECL may be NULL when calling function indirectly
5579 or considering a libcall. */
5581 static int
5582 ix86_function_regparm (const_tree type, const_tree decl)
5584 tree attr;
5585 int regparm;
5586 unsigned int ccvt;
5588 if (TARGET_64BIT)
5589 return (ix86_function_type_abi (type) == SYSV_ABI
5590 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5591 ccvt = ix86_get_callcvt (type);
5592 regparm = ix86_regparm;
5594 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5596 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5597 if (attr)
5599 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5600 return regparm;
5603 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5604 return 2;
5605 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5606 return 1;
5608 /* Use register calling convention for local functions when possible. */
5609 if (decl
5610 && TREE_CODE (decl) == FUNCTION_DECL
5611 /* Caller and callee must agree on the calling convention, so
5612 checking here just optimize means that with
5613 __attribute__((optimize (...))) caller could use regparm convention
5614 and callee not, or vice versa. Instead look at whether the callee
5615 is optimized or not. */
5616 && opt_for_fn (decl, optimize)
5617 && !(profile_flag && !flag_fentry))
5619 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5620 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5621 if (i && i->local && i->can_change_signature)
5623 int local_regparm, globals = 0, regno;
5625 /* Make sure no regparm register is taken by a
5626 fixed register variable. */
5627 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5628 if (fixed_regs[local_regparm])
5629 break;
5631 /* We don't want to use regparm(3) for nested functions as
5632 these use a static chain pointer in the third argument. */
5633 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5634 local_regparm = 2;
5636 /* In 32-bit mode save a register for the split stack. */
5637 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5638 local_regparm = 2;
5640 /* Each fixed register usage increases register pressure,
5641 so less registers should be used for argument passing.
5642 This functionality can be overriden by an explicit
5643 regparm value. */
5644 for (regno = AX_REG; regno <= DI_REG; regno++)
5645 if (fixed_regs[regno])
5646 globals++;
5648 local_regparm
5649 = globals < local_regparm ? local_regparm - globals : 0;
5651 if (local_regparm > regparm)
5652 regparm = local_regparm;
5656 return regparm;
5659 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5660 DFmode (2) arguments in SSE registers for a function with the
5661 indicated TYPE and DECL. DECL may be NULL when calling function
5662 indirectly or considering a libcall. Otherwise return 0. */
5664 static int
5665 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5667 gcc_assert (!TARGET_64BIT);
5669 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5670 by the sseregparm attribute. */
5671 if (TARGET_SSEREGPARM
5672 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5674 if (!TARGET_SSE)
5676 if (warn)
5678 if (decl)
5679 error ("calling %qD with attribute sseregparm without "
5680 "SSE/SSE2 enabled", decl);
5681 else
5682 error ("calling %qT with attribute sseregparm without "
5683 "SSE/SSE2 enabled", type);
5685 return 0;
5688 return 2;
5691 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5692 (and DFmode for SSE2) arguments in SSE registers. */
5693 if (decl && TARGET_SSE_MATH && optimize
5694 && !(profile_flag && !flag_fentry))
5696 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5697 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5698 if (i && i->local && i->can_change_signature)
5699 return TARGET_SSE2 ? 2 : 1;
5702 return 0;
5705 /* Return true if EAX is live at the start of the function. Used by
5706 ix86_expand_prologue to determine if we need special help before
5707 calling allocate_stack_worker. */
5709 static bool
5710 ix86_eax_live_at_start_p (void)
5712 /* Cheat. Don't bother working forward from ix86_function_regparm
5713 to the function type to whether an actual argument is located in
5714 eax. Instead just look at cfg info, which is still close enough
5715 to correct at this point. This gives false positives for broken
5716 functions that might use uninitialized data that happens to be
5717 allocated in eax, but who cares? */
5718 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5721 static bool
5722 ix86_keep_aggregate_return_pointer (tree fntype)
5724 tree attr;
5726 if (!TARGET_64BIT)
5728 attr = lookup_attribute ("callee_pop_aggregate_return",
5729 TYPE_ATTRIBUTES (fntype));
5730 if (attr)
5731 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5733 /* For 32-bit MS-ABI the default is to keep aggregate
5734 return pointer. */
5735 if (ix86_function_type_abi (fntype) == MS_ABI)
5736 return true;
5738 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5741 /* Value is the number of bytes of arguments automatically
5742 popped when returning from a subroutine call.
5743 FUNDECL is the declaration node of the function (as a tree),
5744 FUNTYPE is the data type of the function (as a tree),
5745 or for a library call it is an identifier node for the subroutine name.
5746 SIZE is the number of bytes of arguments passed on the stack.
5748 On the 80386, the RTD insn may be used to pop them if the number
5749 of args is fixed, but if the number is variable then the caller
5750 must pop them all. RTD can't be used for library calls now
5751 because the library is compiled with the Unix compiler.
5752 Use of RTD is a selectable option, since it is incompatible with
5753 standard Unix calling sequences. If the option is not selected,
5754 the caller must always pop the args.
5756 The attribute stdcall is equivalent to RTD on a per module basis. */
5758 static int
5759 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5761 unsigned int ccvt;
5763 /* None of the 64-bit ABIs pop arguments. */
5764 if (TARGET_64BIT)
5765 return 0;
5767 ccvt = ix86_get_callcvt (funtype);
5769 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5770 | IX86_CALLCVT_THISCALL)) != 0
5771 && ! stdarg_p (funtype))
5772 return size;
5774 /* Lose any fake structure return argument if it is passed on the stack. */
5775 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5776 && !ix86_keep_aggregate_return_pointer (funtype))
5778 int nregs = ix86_function_regparm (funtype, fundecl);
5779 if (nregs == 0)
5780 return GET_MODE_SIZE (Pmode);
5783 return 0;
5786 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5788 static bool
5789 ix86_legitimate_combined_insn (rtx insn)
5791 /* Check operand constraints in case hard registers were propagated
5792 into insn pattern. This check prevents combine pass from
5793 generating insn patterns with invalid hard register operands.
5794 These invalid insns can eventually confuse reload to error out
5795 with a spill failure. See also PRs 46829 and 46843. */
5796 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5798 int i;
5800 extract_insn (insn);
5801 preprocess_constraints ();
5803 for (i = 0; i < recog_data.n_operands; i++)
5805 rtx op = recog_data.operand[i];
5806 enum machine_mode mode = GET_MODE (op);
5807 struct operand_alternative *op_alt;
5808 int offset = 0;
5809 bool win;
5810 int j;
5812 /* For pre-AVX disallow unaligned loads/stores where the
5813 instructions don't support it. */
5814 if (!TARGET_AVX
5815 && VECTOR_MODE_P (GET_MODE (op))
5816 && misaligned_operand (op, GET_MODE (op)))
5818 int min_align = get_attr_ssememalign (insn);
5819 if (min_align == 0)
5820 return false;
5823 /* A unary operator may be accepted by the predicate, but it
5824 is irrelevant for matching constraints. */
5825 if (UNARY_P (op))
5826 op = XEXP (op, 0);
5828 if (GET_CODE (op) == SUBREG)
5830 if (REG_P (SUBREG_REG (op))
5831 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5832 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5833 GET_MODE (SUBREG_REG (op)),
5834 SUBREG_BYTE (op),
5835 GET_MODE (op));
5836 op = SUBREG_REG (op);
5839 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5840 continue;
5842 op_alt = recog_op_alt[i];
5844 /* Operand has no constraints, anything is OK. */
5845 win = !recog_data.n_alternatives;
5847 for (j = 0; j < recog_data.n_alternatives; j++)
5849 if (op_alt[j].anything_ok
5850 || (op_alt[j].matches != -1
5851 && operands_match_p
5852 (recog_data.operand[i],
5853 recog_data.operand[op_alt[j].matches]))
5854 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5856 win = true;
5857 break;
5861 if (!win)
5862 return false;
5866 return true;
5869 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5871 static unsigned HOST_WIDE_INT
5872 ix86_asan_shadow_offset (void)
5874 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5875 : HOST_WIDE_INT_C (0x7fff8000))
5876 : (HOST_WIDE_INT_1 << 29);
5879 /* Argument support functions. */
5881 /* Return true when register may be used to pass function parameters. */
5882 bool
5883 ix86_function_arg_regno_p (int regno)
5885 int i;
5886 const int *parm_regs;
5888 if (!TARGET_64BIT)
5890 if (TARGET_MACHO)
5891 return (regno < REGPARM_MAX
5892 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5893 else
5894 return (regno < REGPARM_MAX
5895 || (TARGET_MMX && MMX_REGNO_P (regno)
5896 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5897 || (TARGET_SSE && SSE_REGNO_P (regno)
5898 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5901 if (TARGET_SSE && SSE_REGNO_P (regno)
5902 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5903 return true;
5905 /* TODO: The function should depend on current function ABI but
5906 builtins.c would need updating then. Therefore we use the
5907 default ABI. */
5909 /* RAX is used as hidden argument to va_arg functions. */
5910 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5911 return true;
5913 if (ix86_abi == MS_ABI)
5914 parm_regs = x86_64_ms_abi_int_parameter_registers;
5915 else
5916 parm_regs = x86_64_int_parameter_registers;
5917 for (i = 0; i < (ix86_abi == MS_ABI
5918 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5919 if (regno == parm_regs[i])
5920 return true;
5921 return false;
5924 /* Return if we do not know how to pass TYPE solely in registers. */
5926 static bool
5927 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5929 if (must_pass_in_stack_var_size_or_pad (mode, type))
5930 return true;
5932 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5933 The layout_type routine is crafty and tries to trick us into passing
5934 currently unsupported vector types on the stack by using TImode. */
5935 return (!TARGET_64BIT && mode == TImode
5936 && type && TREE_CODE (type) != VECTOR_TYPE);
5939 /* It returns the size, in bytes, of the area reserved for arguments passed
5940 in registers for the function represented by fndecl dependent to the used
5941 abi format. */
5943 ix86_reg_parm_stack_space (const_tree fndecl)
5945 enum calling_abi call_abi = SYSV_ABI;
5946 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5947 call_abi = ix86_function_abi (fndecl);
5948 else
5949 call_abi = ix86_function_type_abi (fndecl);
5950 if (TARGET_64BIT && call_abi == MS_ABI)
5951 return 32;
5952 return 0;
5955 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5956 call abi used. */
5957 enum calling_abi
5958 ix86_function_type_abi (const_tree fntype)
5960 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5962 enum calling_abi abi = ix86_abi;
5963 if (abi == SYSV_ABI)
5965 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5966 abi = MS_ABI;
5968 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5969 abi = SYSV_ABI;
5970 return abi;
5972 return ix86_abi;
5975 /* We add this as a workaround in order to use libc_has_function
5976 hook in i386.md. */
5977 bool
5978 ix86_libc_has_function (enum function_class fn_class)
5980 return targetm.libc_has_function (fn_class);
5983 static bool
5984 ix86_function_ms_hook_prologue (const_tree fn)
5986 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5988 if (decl_function_context (fn) != NULL_TREE)
5989 error_at (DECL_SOURCE_LOCATION (fn),
5990 "ms_hook_prologue is not compatible with nested function");
5991 else
5992 return true;
5994 return false;
5997 static enum calling_abi
5998 ix86_function_abi (const_tree fndecl)
6000 if (! fndecl)
6001 return ix86_abi;
6002 return ix86_function_type_abi (TREE_TYPE (fndecl));
6005 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6006 call abi used. */
6007 enum calling_abi
6008 ix86_cfun_abi (void)
6010 if (! cfun)
6011 return ix86_abi;
6012 return cfun->machine->call_abi;
6015 /* Write the extra assembler code needed to declare a function properly. */
6017 void
6018 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6019 tree decl)
6021 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6023 if (is_ms_hook)
6025 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6026 unsigned int filler_cc = 0xcccccccc;
6028 for (i = 0; i < filler_count; i += 4)
6029 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6032 #ifdef SUBTARGET_ASM_UNWIND_INIT
6033 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6034 #endif
6036 ASM_OUTPUT_LABEL (asm_out_file, fname);
6038 /* Output magic byte marker, if hot-patch attribute is set. */
6039 if (is_ms_hook)
6041 if (TARGET_64BIT)
6043 /* leaq [%rsp + 0], %rsp */
6044 asm_fprintf (asm_out_file, ASM_BYTE
6045 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6047 else
6049 /* movl.s %edi, %edi
6050 push %ebp
6051 movl.s %esp, %ebp */
6052 asm_fprintf (asm_out_file, ASM_BYTE
6053 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6058 /* regclass.c */
6059 extern void init_regs (void);
6061 /* Implementation of call abi switching target hook. Specific to FNDECL
6062 the specific call register sets are set. See also
6063 ix86_conditional_register_usage for more details. */
6064 void
6065 ix86_call_abi_override (const_tree fndecl)
6067 if (fndecl == NULL_TREE)
6068 cfun->machine->call_abi = ix86_abi;
6069 else
6070 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6073 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6074 expensive re-initialization of init_regs each time we switch function context
6075 since this is needed only during RTL expansion. */
6076 static void
6077 ix86_maybe_switch_abi (void)
6079 if (TARGET_64BIT &&
6080 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6081 reinit_regs ();
6084 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6085 for a call to a function whose data type is FNTYPE.
6086 For a library call, FNTYPE is 0. */
6088 void
6089 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6090 tree fntype, /* tree ptr for function decl */
6091 rtx libname, /* SYMBOL_REF of library name or 0 */
6092 tree fndecl,
6093 int caller)
6095 struct cgraph_local_info *i;
6097 memset (cum, 0, sizeof (*cum));
6099 if (fndecl)
6101 i = cgraph_local_info (fndecl);
6102 cum->call_abi = ix86_function_abi (fndecl);
6104 else
6106 i = NULL;
6107 cum->call_abi = ix86_function_type_abi (fntype);
6110 cum->caller = caller;
6112 /* Set up the number of registers to use for passing arguments. */
6113 cum->nregs = ix86_regparm;
6114 if (TARGET_64BIT)
6116 cum->nregs = (cum->call_abi == SYSV_ABI
6117 ? X86_64_REGPARM_MAX
6118 : X86_64_MS_REGPARM_MAX);
6120 if (TARGET_SSE)
6122 cum->sse_nregs = SSE_REGPARM_MAX;
6123 if (TARGET_64BIT)
6125 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6126 ? X86_64_SSE_REGPARM_MAX
6127 : X86_64_MS_SSE_REGPARM_MAX);
6130 if (TARGET_MMX)
6131 cum->mmx_nregs = MMX_REGPARM_MAX;
6132 cum->warn_avx512f = true;
6133 cum->warn_avx = true;
6134 cum->warn_sse = true;
6135 cum->warn_mmx = true;
6137 /* Because type might mismatch in between caller and callee, we need to
6138 use actual type of function for local calls.
6139 FIXME: cgraph_analyze can be told to actually record if function uses
6140 va_start so for local functions maybe_vaarg can be made aggressive
6141 helping K&R code.
6142 FIXME: once typesytem is fixed, we won't need this code anymore. */
6143 if (i && i->local && i->can_change_signature)
6144 fntype = TREE_TYPE (fndecl);
6145 cum->maybe_vaarg = (fntype
6146 ? (!prototype_p (fntype) || stdarg_p (fntype))
6147 : !libname);
6149 if (!TARGET_64BIT)
6151 /* If there are variable arguments, then we won't pass anything
6152 in registers in 32-bit mode. */
6153 if (stdarg_p (fntype))
6155 cum->nregs = 0;
6156 cum->sse_nregs = 0;
6157 cum->mmx_nregs = 0;
6158 cum->warn_avx512f = false;
6159 cum->warn_avx = false;
6160 cum->warn_sse = false;
6161 cum->warn_mmx = false;
6162 return;
6165 /* Use ecx and edx registers if function has fastcall attribute,
6166 else look for regparm information. */
6167 if (fntype)
6169 unsigned int ccvt = ix86_get_callcvt (fntype);
6170 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6172 cum->nregs = 1;
6173 cum->fastcall = 1; /* Same first register as in fastcall. */
6175 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6177 cum->nregs = 2;
6178 cum->fastcall = 1;
6180 else
6181 cum->nregs = ix86_function_regparm (fntype, fndecl);
6184 /* Set up the number of SSE registers used for passing SFmode
6185 and DFmode arguments. Warn for mismatching ABI. */
6186 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6190 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6191 But in the case of vector types, it is some vector mode.
6193 When we have only some of our vector isa extensions enabled, then there
6194 are some modes for which vector_mode_supported_p is false. For these
6195 modes, the generic vector support in gcc will choose some non-vector mode
6196 in order to implement the type. By computing the natural mode, we'll
6197 select the proper ABI location for the operand and not depend on whatever
6198 the middle-end decides to do with these vector types.
6200 The midde-end can't deal with the vector types > 16 bytes. In this
6201 case, we return the original mode and warn ABI change if CUM isn't
6202 NULL.
6204 If INT_RETURN is true, warn ABI change if the vector mode isn't
6205 available for function return value. */
6207 static enum machine_mode
6208 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6209 bool in_return)
6211 enum machine_mode mode = TYPE_MODE (type);
6213 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6215 HOST_WIDE_INT size = int_size_in_bytes (type);
6216 if ((size == 8 || size == 16 || size == 32 || size == 64)
6217 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6218 && TYPE_VECTOR_SUBPARTS (type) > 1)
6220 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6222 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6223 mode = MIN_MODE_VECTOR_FLOAT;
6224 else
6225 mode = MIN_MODE_VECTOR_INT;
6227 /* Get the mode which has this inner mode and number of units. */
6228 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6229 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6230 && GET_MODE_INNER (mode) == innermode)
6232 if (size == 64 && !TARGET_AVX512F)
6234 static bool warnedavx512f;
6235 static bool warnedavx512f_ret;
6237 if (cum && cum->warn_avx512f && !warnedavx512f)
6239 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6240 "without AVX512F enabled changes the ABI"))
6241 warnedavx512f = true;
6243 else if (in_return && !warnedavx512f_ret)
6245 if (warning (OPT_Wpsabi, "AVX512F vector return "
6246 "without AVX512F enabled changes the ABI"))
6247 warnedavx512f_ret = true;
6250 return TYPE_MODE (type);
6252 else if (size == 32 && !TARGET_AVX)
6254 static bool warnedavx;
6255 static bool warnedavx_ret;
6257 if (cum && cum->warn_avx && !warnedavx)
6259 if (warning (OPT_Wpsabi, "AVX vector argument "
6260 "without AVX enabled changes the ABI"))
6261 warnedavx = true;
6263 else if (in_return && !warnedavx_ret)
6265 if (warning (OPT_Wpsabi, "AVX vector return "
6266 "without AVX enabled changes the ABI"))
6267 warnedavx_ret = true;
6270 return TYPE_MODE (type);
6272 else if (((size == 8 && TARGET_64BIT) || size == 16)
6273 && !TARGET_SSE)
6275 static bool warnedsse;
6276 static bool warnedsse_ret;
6278 if (cum && cum->warn_sse && !warnedsse)
6280 if (warning (OPT_Wpsabi, "SSE vector argument "
6281 "without SSE enabled changes the ABI"))
6282 warnedsse = true;
6284 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6286 if (warning (OPT_Wpsabi, "SSE vector return "
6287 "without SSE enabled changes the ABI"))
6288 warnedsse_ret = true;
6291 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6293 static bool warnedmmx;
6294 static bool warnedmmx_ret;
6296 if (cum && cum->warn_mmx && !warnedmmx)
6298 if (warning (OPT_Wpsabi, "MMX vector argument "
6299 "without MMX enabled changes the ABI"))
6300 warnedmmx = true;
6302 else if (in_return && !warnedmmx_ret)
6304 if (warning (OPT_Wpsabi, "MMX vector return "
6305 "without MMX enabled changes the ABI"))
6306 warnedmmx_ret = true;
6309 return mode;
6312 gcc_unreachable ();
6316 return mode;
6319 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6320 this may not agree with the mode that the type system has chosen for the
6321 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6322 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6324 static rtx
6325 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6326 unsigned int regno)
6328 rtx tmp;
6330 if (orig_mode != BLKmode)
6331 tmp = gen_rtx_REG (orig_mode, regno);
6332 else
6334 tmp = gen_rtx_REG (mode, regno);
6335 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6336 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6339 return tmp;
6342 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6343 of this code is to classify each 8bytes of incoming argument by the register
6344 class and assign registers accordingly. */
6346 /* Return the union class of CLASS1 and CLASS2.
6347 See the x86-64 PS ABI for details. */
6349 static enum x86_64_reg_class
6350 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6352 /* Rule #1: If both classes are equal, this is the resulting class. */
6353 if (class1 == class2)
6354 return class1;
6356 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6357 the other class. */
6358 if (class1 == X86_64_NO_CLASS)
6359 return class2;
6360 if (class2 == X86_64_NO_CLASS)
6361 return class1;
6363 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6364 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6365 return X86_64_MEMORY_CLASS;
6367 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6368 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6369 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6370 return X86_64_INTEGERSI_CLASS;
6371 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6372 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6373 return X86_64_INTEGER_CLASS;
6375 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6376 MEMORY is used. */
6377 if (class1 == X86_64_X87_CLASS
6378 || class1 == X86_64_X87UP_CLASS
6379 || class1 == X86_64_COMPLEX_X87_CLASS
6380 || class2 == X86_64_X87_CLASS
6381 || class2 == X86_64_X87UP_CLASS
6382 || class2 == X86_64_COMPLEX_X87_CLASS)
6383 return X86_64_MEMORY_CLASS;
6385 /* Rule #6: Otherwise class SSE is used. */
6386 return X86_64_SSE_CLASS;
6389 /* Classify the argument of type TYPE and mode MODE.
6390 CLASSES will be filled by the register class used to pass each word
6391 of the operand. The number of words is returned. In case the parameter
6392 should be passed in memory, 0 is returned. As a special case for zero
6393 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6395 BIT_OFFSET is used internally for handling records and specifies offset
6396 of the offset in bits modulo 512 to avoid overflow cases.
6398 See the x86-64 PS ABI for details.
6401 static int
6402 classify_argument (enum machine_mode mode, const_tree type,
6403 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6405 HOST_WIDE_INT bytes =
6406 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6407 int words
6408 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6410 /* Variable sized entities are always passed/returned in memory. */
6411 if (bytes < 0)
6412 return 0;
6414 if (mode != VOIDmode
6415 && targetm.calls.must_pass_in_stack (mode, type))
6416 return 0;
6418 if (type && AGGREGATE_TYPE_P (type))
6420 int i;
6421 tree field;
6422 enum x86_64_reg_class subclasses[MAX_CLASSES];
6424 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6425 if (bytes > 32)
6426 return 0;
6428 for (i = 0; i < words; i++)
6429 classes[i] = X86_64_NO_CLASS;
6431 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6432 signalize memory class, so handle it as special case. */
6433 if (!words)
6435 classes[0] = X86_64_NO_CLASS;
6436 return 1;
6439 /* Classify each field of record and merge classes. */
6440 switch (TREE_CODE (type))
6442 case RECORD_TYPE:
6443 /* And now merge the fields of structure. */
6444 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6446 if (TREE_CODE (field) == FIELD_DECL)
6448 int num;
6450 if (TREE_TYPE (field) == error_mark_node)
6451 continue;
6453 /* Bitfields are always classified as integer. Handle them
6454 early, since later code would consider them to be
6455 misaligned integers. */
6456 if (DECL_BIT_FIELD (field))
6458 for (i = (int_bit_position (field)
6459 + (bit_offset % 64)) / 8 / 8;
6460 i < ((int_bit_position (field) + (bit_offset % 64))
6461 + tree_to_shwi (DECL_SIZE (field))
6462 + 63) / 8 / 8; i++)
6463 classes[i] =
6464 merge_classes (X86_64_INTEGER_CLASS,
6465 classes[i]);
6467 else
6469 int pos;
6471 type = TREE_TYPE (field);
6473 /* Flexible array member is ignored. */
6474 if (TYPE_MODE (type) == BLKmode
6475 && TREE_CODE (type) == ARRAY_TYPE
6476 && TYPE_SIZE (type) == NULL_TREE
6477 && TYPE_DOMAIN (type) != NULL_TREE
6478 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6479 == NULL_TREE))
6481 static bool warned;
6483 if (!warned && warn_psabi)
6485 warned = true;
6486 inform (input_location,
6487 "the ABI of passing struct with"
6488 " a flexible array member has"
6489 " changed in GCC 4.4");
6491 continue;
6493 num = classify_argument (TYPE_MODE (type), type,
6494 subclasses,
6495 (int_bit_position (field)
6496 + bit_offset) % 512);
6497 if (!num)
6498 return 0;
6499 pos = (int_bit_position (field)
6500 + (bit_offset % 64)) / 8 / 8;
6501 for (i = 0; i < num && (i + pos) < words; i++)
6502 classes[i + pos] =
6503 merge_classes (subclasses[i], classes[i + pos]);
6507 break;
6509 case ARRAY_TYPE:
6510 /* Arrays are handled as small records. */
6512 int num;
6513 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6514 TREE_TYPE (type), subclasses, bit_offset);
6515 if (!num)
6516 return 0;
6518 /* The partial classes are now full classes. */
6519 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6520 subclasses[0] = X86_64_SSE_CLASS;
6521 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6522 && !((bit_offset % 64) == 0 && bytes == 4))
6523 subclasses[0] = X86_64_INTEGER_CLASS;
6525 for (i = 0; i < words; i++)
6526 classes[i] = subclasses[i % num];
6528 break;
6530 case UNION_TYPE:
6531 case QUAL_UNION_TYPE:
6532 /* Unions are similar to RECORD_TYPE but offset is always 0.
6534 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6536 if (TREE_CODE (field) == FIELD_DECL)
6538 int num;
6540 if (TREE_TYPE (field) == error_mark_node)
6541 continue;
6543 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6544 TREE_TYPE (field), subclasses,
6545 bit_offset);
6546 if (!num)
6547 return 0;
6548 for (i = 0; i < num; i++)
6549 classes[i] = merge_classes (subclasses[i], classes[i]);
6552 break;
6554 default:
6555 gcc_unreachable ();
6558 if (words > 2)
6560 /* When size > 16 bytes, if the first one isn't
6561 X86_64_SSE_CLASS or any other ones aren't
6562 X86_64_SSEUP_CLASS, everything should be passed in
6563 memory. */
6564 if (classes[0] != X86_64_SSE_CLASS)
6565 return 0;
6567 for (i = 1; i < words; i++)
6568 if (classes[i] != X86_64_SSEUP_CLASS)
6569 return 0;
6572 /* Final merger cleanup. */
6573 for (i = 0; i < words; i++)
6575 /* If one class is MEMORY, everything should be passed in
6576 memory. */
6577 if (classes[i] == X86_64_MEMORY_CLASS)
6578 return 0;
6580 /* The X86_64_SSEUP_CLASS should be always preceded by
6581 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6582 if (classes[i] == X86_64_SSEUP_CLASS
6583 && classes[i - 1] != X86_64_SSE_CLASS
6584 && classes[i - 1] != X86_64_SSEUP_CLASS)
6586 /* The first one should never be X86_64_SSEUP_CLASS. */
6587 gcc_assert (i != 0);
6588 classes[i] = X86_64_SSE_CLASS;
6591 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6592 everything should be passed in memory. */
6593 if (classes[i] == X86_64_X87UP_CLASS
6594 && (classes[i - 1] != X86_64_X87_CLASS))
6596 static bool warned;
6598 /* The first one should never be X86_64_X87UP_CLASS. */
6599 gcc_assert (i != 0);
6600 if (!warned && warn_psabi)
6602 warned = true;
6603 inform (input_location,
6604 "the ABI of passing union with long double"
6605 " has changed in GCC 4.4");
6607 return 0;
6610 return words;
6613 /* Compute alignment needed. We align all types to natural boundaries with
6614 exception of XFmode that is aligned to 64bits. */
6615 if (mode != VOIDmode && mode != BLKmode)
6617 int mode_alignment = GET_MODE_BITSIZE (mode);
6619 if (mode == XFmode)
6620 mode_alignment = 128;
6621 else if (mode == XCmode)
6622 mode_alignment = 256;
6623 if (COMPLEX_MODE_P (mode))
6624 mode_alignment /= 2;
6625 /* Misaligned fields are always returned in memory. */
6626 if (bit_offset % mode_alignment)
6627 return 0;
6630 /* for V1xx modes, just use the base mode */
6631 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6632 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6633 mode = GET_MODE_INNER (mode);
6635 /* Classification of atomic types. */
6636 switch (mode)
6638 case SDmode:
6639 case DDmode:
6640 classes[0] = X86_64_SSE_CLASS;
6641 return 1;
6642 case TDmode:
6643 classes[0] = X86_64_SSE_CLASS;
6644 classes[1] = X86_64_SSEUP_CLASS;
6645 return 2;
6646 case DImode:
6647 case SImode:
6648 case HImode:
6649 case QImode:
6650 case CSImode:
6651 case CHImode:
6652 case CQImode:
6654 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6656 /* Analyze last 128 bits only. */
6657 size = (size - 1) & 0x7f;
6659 if (size < 32)
6661 classes[0] = X86_64_INTEGERSI_CLASS;
6662 return 1;
6664 else if (size < 64)
6666 classes[0] = X86_64_INTEGER_CLASS;
6667 return 1;
6669 else if (size < 64+32)
6671 classes[0] = X86_64_INTEGER_CLASS;
6672 classes[1] = X86_64_INTEGERSI_CLASS;
6673 return 2;
6675 else if (size < 64+64)
6677 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6678 return 2;
6680 else
6681 gcc_unreachable ();
6683 case CDImode:
6684 case TImode:
6685 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6686 return 2;
6687 case COImode:
6688 case OImode:
6689 /* OImode shouldn't be used directly. */
6690 gcc_unreachable ();
6691 case CTImode:
6692 return 0;
6693 case SFmode:
6694 if (!(bit_offset % 64))
6695 classes[0] = X86_64_SSESF_CLASS;
6696 else
6697 classes[0] = X86_64_SSE_CLASS;
6698 return 1;
6699 case DFmode:
6700 classes[0] = X86_64_SSEDF_CLASS;
6701 return 1;
6702 case XFmode:
6703 classes[0] = X86_64_X87_CLASS;
6704 classes[1] = X86_64_X87UP_CLASS;
6705 return 2;
6706 case TFmode:
6707 classes[0] = X86_64_SSE_CLASS;
6708 classes[1] = X86_64_SSEUP_CLASS;
6709 return 2;
6710 case SCmode:
6711 classes[0] = X86_64_SSE_CLASS;
6712 if (!(bit_offset % 64))
6713 return 1;
6714 else
6716 static bool warned;
6718 if (!warned && warn_psabi)
6720 warned = true;
6721 inform (input_location,
6722 "the ABI of passing structure with complex float"
6723 " member has changed in GCC 4.4");
6725 classes[1] = X86_64_SSESF_CLASS;
6726 return 2;
6728 case DCmode:
6729 classes[0] = X86_64_SSEDF_CLASS;
6730 classes[1] = X86_64_SSEDF_CLASS;
6731 return 2;
6732 case XCmode:
6733 classes[0] = X86_64_COMPLEX_X87_CLASS;
6734 return 1;
6735 case TCmode:
6736 /* This modes is larger than 16 bytes. */
6737 return 0;
6738 case V8SFmode:
6739 case V8SImode:
6740 case V32QImode:
6741 case V16HImode:
6742 case V4DFmode:
6743 case V4DImode:
6744 classes[0] = X86_64_SSE_CLASS;
6745 classes[1] = X86_64_SSEUP_CLASS;
6746 classes[2] = X86_64_SSEUP_CLASS;
6747 classes[3] = X86_64_SSEUP_CLASS;
6748 return 4;
6749 case V8DFmode:
6750 case V16SFmode:
6751 case V8DImode:
6752 case V16SImode:
6753 case V32HImode:
6754 case V64QImode:
6755 classes[0] = X86_64_SSE_CLASS;
6756 classes[1] = X86_64_SSEUP_CLASS;
6757 classes[2] = X86_64_SSEUP_CLASS;
6758 classes[3] = X86_64_SSEUP_CLASS;
6759 classes[4] = X86_64_SSEUP_CLASS;
6760 classes[5] = X86_64_SSEUP_CLASS;
6761 classes[6] = X86_64_SSEUP_CLASS;
6762 classes[7] = X86_64_SSEUP_CLASS;
6763 return 8;
6764 case V4SFmode:
6765 case V4SImode:
6766 case V16QImode:
6767 case V8HImode:
6768 case V2DFmode:
6769 case V2DImode:
6770 classes[0] = X86_64_SSE_CLASS;
6771 classes[1] = X86_64_SSEUP_CLASS;
6772 return 2;
6773 case V1TImode:
6774 case V1DImode:
6775 case V2SFmode:
6776 case V2SImode:
6777 case V4HImode:
6778 case V8QImode:
6779 classes[0] = X86_64_SSE_CLASS;
6780 return 1;
6781 case BLKmode:
6782 case VOIDmode:
6783 return 0;
6784 default:
6785 gcc_assert (VECTOR_MODE_P (mode));
6787 if (bytes > 16)
6788 return 0;
6790 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6792 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6793 classes[0] = X86_64_INTEGERSI_CLASS;
6794 else
6795 classes[0] = X86_64_INTEGER_CLASS;
6796 classes[1] = X86_64_INTEGER_CLASS;
6797 return 1 + (bytes > 8);
6801 /* Examine the argument and return set number of register required in each
6802 class. Return 0 iff parameter should be passed in memory. */
6803 static int
6804 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6805 int *int_nregs, int *sse_nregs)
6807 enum x86_64_reg_class regclass[MAX_CLASSES];
6808 int n = classify_argument (mode, type, regclass, 0);
6810 *int_nregs = 0;
6811 *sse_nregs = 0;
6812 if (!n)
6813 return 0;
6814 for (n--; n >= 0; n--)
6815 switch (regclass[n])
6817 case X86_64_INTEGER_CLASS:
6818 case X86_64_INTEGERSI_CLASS:
6819 (*int_nregs)++;
6820 break;
6821 case X86_64_SSE_CLASS:
6822 case X86_64_SSESF_CLASS:
6823 case X86_64_SSEDF_CLASS:
6824 (*sse_nregs)++;
6825 break;
6826 case X86_64_NO_CLASS:
6827 case X86_64_SSEUP_CLASS:
6828 break;
6829 case X86_64_X87_CLASS:
6830 case X86_64_X87UP_CLASS:
6831 if (!in_return)
6832 return 0;
6833 break;
6834 case X86_64_COMPLEX_X87_CLASS:
6835 return in_return ? 2 : 0;
6836 case X86_64_MEMORY_CLASS:
6837 gcc_unreachable ();
6839 return 1;
6842 /* Construct container for the argument used by GCC interface. See
6843 FUNCTION_ARG for the detailed description. */
6845 static rtx
6846 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6847 const_tree type, int in_return, int nintregs, int nsseregs,
6848 const int *intreg, int sse_regno)
6850 /* The following variables hold the static issued_error state. */
6851 static bool issued_sse_arg_error;
6852 static bool issued_sse_ret_error;
6853 static bool issued_x87_ret_error;
6855 enum machine_mode tmpmode;
6856 int bytes =
6857 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6858 enum x86_64_reg_class regclass[MAX_CLASSES];
6859 int n;
6860 int i;
6861 int nexps = 0;
6862 int needed_sseregs, needed_intregs;
6863 rtx exp[MAX_CLASSES];
6864 rtx ret;
6866 n = classify_argument (mode, type, regclass, 0);
6867 if (!n)
6868 return NULL;
6869 if (!examine_argument (mode, type, in_return, &needed_intregs,
6870 &needed_sseregs))
6871 return NULL;
6872 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6873 return NULL;
6875 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6876 some less clueful developer tries to use floating-point anyway. */
6877 if (needed_sseregs && !TARGET_SSE)
6879 if (in_return)
6881 if (!issued_sse_ret_error)
6883 error ("SSE register return with SSE disabled");
6884 issued_sse_ret_error = true;
6887 else if (!issued_sse_arg_error)
6889 error ("SSE register argument with SSE disabled");
6890 issued_sse_arg_error = true;
6892 return NULL;
6895 /* Likewise, error if the ABI requires us to return values in the
6896 x87 registers and the user specified -mno-80387. */
6897 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6898 for (i = 0; i < n; i++)
6899 if (regclass[i] == X86_64_X87_CLASS
6900 || regclass[i] == X86_64_X87UP_CLASS
6901 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6903 if (!issued_x87_ret_error)
6905 error ("x87 register return with x87 disabled");
6906 issued_x87_ret_error = true;
6908 return NULL;
6911 /* First construct simple cases. Avoid SCmode, since we want to use
6912 single register to pass this type. */
6913 if (n == 1 && mode != SCmode)
6914 switch (regclass[0])
6916 case X86_64_INTEGER_CLASS:
6917 case X86_64_INTEGERSI_CLASS:
6918 return gen_rtx_REG (mode, intreg[0]);
6919 case X86_64_SSE_CLASS:
6920 case X86_64_SSESF_CLASS:
6921 case X86_64_SSEDF_CLASS:
6922 if (mode != BLKmode)
6923 return gen_reg_or_parallel (mode, orig_mode,
6924 SSE_REGNO (sse_regno));
6925 break;
6926 case X86_64_X87_CLASS:
6927 case X86_64_COMPLEX_X87_CLASS:
6928 return gen_rtx_REG (mode, FIRST_STACK_REG);
6929 case X86_64_NO_CLASS:
6930 /* Zero sized array, struct or class. */
6931 return NULL;
6932 default:
6933 gcc_unreachable ();
6935 if (n == 2
6936 && regclass[0] == X86_64_SSE_CLASS
6937 && regclass[1] == X86_64_SSEUP_CLASS
6938 && mode != BLKmode)
6939 return gen_reg_or_parallel (mode, orig_mode,
6940 SSE_REGNO (sse_regno));
6941 if (n == 4
6942 && regclass[0] == X86_64_SSE_CLASS
6943 && regclass[1] == X86_64_SSEUP_CLASS
6944 && regclass[2] == X86_64_SSEUP_CLASS
6945 && regclass[3] == X86_64_SSEUP_CLASS
6946 && mode != BLKmode)
6947 return gen_reg_or_parallel (mode, orig_mode,
6948 SSE_REGNO (sse_regno));
6949 if (n == 8
6950 && regclass[0] == X86_64_SSE_CLASS
6951 && regclass[1] == X86_64_SSEUP_CLASS
6952 && regclass[2] == X86_64_SSEUP_CLASS
6953 && regclass[3] == X86_64_SSEUP_CLASS
6954 && regclass[4] == X86_64_SSEUP_CLASS
6955 && regclass[5] == X86_64_SSEUP_CLASS
6956 && regclass[6] == X86_64_SSEUP_CLASS
6957 && regclass[7] == X86_64_SSEUP_CLASS
6958 && mode != BLKmode)
6959 return gen_reg_or_parallel (mode, orig_mode,
6960 SSE_REGNO (sse_regno));
6961 if (n == 2
6962 && regclass[0] == X86_64_X87_CLASS
6963 && regclass[1] == X86_64_X87UP_CLASS)
6964 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6966 if (n == 2
6967 && regclass[0] == X86_64_INTEGER_CLASS
6968 && regclass[1] == X86_64_INTEGER_CLASS
6969 && (mode == CDImode || mode == TImode || mode == TFmode)
6970 && intreg[0] + 1 == intreg[1])
6971 return gen_rtx_REG (mode, intreg[0]);
6973 /* Otherwise figure out the entries of the PARALLEL. */
6974 for (i = 0; i < n; i++)
6976 int pos;
6978 switch (regclass[i])
6980 case X86_64_NO_CLASS:
6981 break;
6982 case X86_64_INTEGER_CLASS:
6983 case X86_64_INTEGERSI_CLASS:
6984 /* Merge TImodes on aligned occasions here too. */
6985 if (i * 8 + 8 > bytes)
6986 tmpmode
6987 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6988 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6989 tmpmode = SImode;
6990 else
6991 tmpmode = DImode;
6992 /* We've requested 24 bytes we
6993 don't have mode for. Use DImode. */
6994 if (tmpmode == BLKmode)
6995 tmpmode = DImode;
6996 exp [nexps++]
6997 = gen_rtx_EXPR_LIST (VOIDmode,
6998 gen_rtx_REG (tmpmode, *intreg),
6999 GEN_INT (i*8));
7000 intreg++;
7001 break;
7002 case X86_64_SSESF_CLASS:
7003 exp [nexps++]
7004 = gen_rtx_EXPR_LIST (VOIDmode,
7005 gen_rtx_REG (SFmode,
7006 SSE_REGNO (sse_regno)),
7007 GEN_INT (i*8));
7008 sse_regno++;
7009 break;
7010 case X86_64_SSEDF_CLASS:
7011 exp [nexps++]
7012 = gen_rtx_EXPR_LIST (VOIDmode,
7013 gen_rtx_REG (DFmode,
7014 SSE_REGNO (sse_regno)),
7015 GEN_INT (i*8));
7016 sse_regno++;
7017 break;
7018 case X86_64_SSE_CLASS:
7019 pos = i;
7020 switch (n)
7022 case 1:
7023 tmpmode = DImode;
7024 break;
7025 case 2:
7026 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7028 tmpmode = TImode;
7029 i++;
7031 else
7032 tmpmode = DImode;
7033 break;
7034 case 4:
7035 gcc_assert (i == 0
7036 && regclass[1] == X86_64_SSEUP_CLASS
7037 && regclass[2] == X86_64_SSEUP_CLASS
7038 && regclass[3] == X86_64_SSEUP_CLASS);
7039 tmpmode = OImode;
7040 i += 3;
7041 break;
7042 case 8:
7043 gcc_assert (i == 0
7044 && regclass[1] == X86_64_SSEUP_CLASS
7045 && regclass[2] == X86_64_SSEUP_CLASS
7046 && regclass[3] == X86_64_SSEUP_CLASS
7047 && regclass[4] == X86_64_SSEUP_CLASS
7048 && regclass[5] == X86_64_SSEUP_CLASS
7049 && regclass[6] == X86_64_SSEUP_CLASS
7050 && regclass[7] == X86_64_SSEUP_CLASS);
7051 tmpmode = XImode;
7052 i += 7;
7053 break;
7054 default:
7055 gcc_unreachable ();
7057 exp [nexps++]
7058 = gen_rtx_EXPR_LIST (VOIDmode,
7059 gen_rtx_REG (tmpmode,
7060 SSE_REGNO (sse_regno)),
7061 GEN_INT (pos*8));
7062 sse_regno++;
7063 break;
7064 default:
7065 gcc_unreachable ();
7069 /* Empty aligned struct, union or class. */
7070 if (nexps == 0)
7071 return NULL;
7073 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7074 for (i = 0; i < nexps; i++)
7075 XVECEXP (ret, 0, i) = exp [i];
7076 return ret;
7079 /* Update the data in CUM to advance over an argument of mode MODE
7080 and data type TYPE. (TYPE is null for libcalls where that information
7081 may not be available.) */
7083 static void
7084 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7085 const_tree type, HOST_WIDE_INT bytes,
7086 HOST_WIDE_INT words)
7088 switch (mode)
7090 default:
7091 break;
7093 case BLKmode:
7094 if (bytes < 0)
7095 break;
7096 /* FALLTHRU */
7098 case DImode:
7099 case SImode:
7100 case HImode:
7101 case QImode:
7102 cum->words += words;
7103 cum->nregs -= words;
7104 cum->regno += words;
7106 if (cum->nregs <= 0)
7108 cum->nregs = 0;
7109 cum->regno = 0;
7111 break;
7113 case OImode:
7114 /* OImode shouldn't be used directly. */
7115 gcc_unreachable ();
7117 case DFmode:
7118 if (cum->float_in_sse < 2)
7119 break;
7120 case SFmode:
7121 if (cum->float_in_sse < 1)
7122 break;
7123 /* FALLTHRU */
7125 case V8SFmode:
7126 case V8SImode:
7127 case V64QImode:
7128 case V32HImode:
7129 case V16SImode:
7130 case V8DImode:
7131 case V16SFmode:
7132 case V8DFmode:
7133 case V32QImode:
7134 case V16HImode:
7135 case V4DFmode:
7136 case V4DImode:
7137 case TImode:
7138 case V16QImode:
7139 case V8HImode:
7140 case V4SImode:
7141 case V2DImode:
7142 case V4SFmode:
7143 case V2DFmode:
7144 if (!type || !AGGREGATE_TYPE_P (type))
7146 cum->sse_words += words;
7147 cum->sse_nregs -= 1;
7148 cum->sse_regno += 1;
7149 if (cum->sse_nregs <= 0)
7151 cum->sse_nregs = 0;
7152 cum->sse_regno = 0;
7155 break;
7157 case V8QImode:
7158 case V4HImode:
7159 case V2SImode:
7160 case V2SFmode:
7161 case V1TImode:
7162 case V1DImode:
7163 if (!type || !AGGREGATE_TYPE_P (type))
7165 cum->mmx_words += words;
7166 cum->mmx_nregs -= 1;
7167 cum->mmx_regno += 1;
7168 if (cum->mmx_nregs <= 0)
7170 cum->mmx_nregs = 0;
7171 cum->mmx_regno = 0;
7174 break;
7178 static void
7179 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7180 const_tree type, HOST_WIDE_INT words, bool named)
7182 int int_nregs, sse_nregs;
7184 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7185 if (!named && (VALID_AVX512F_REG_MODE (mode)
7186 || VALID_AVX256_REG_MODE (mode)))
7187 return;
7189 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7190 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7192 cum->nregs -= int_nregs;
7193 cum->sse_nregs -= sse_nregs;
7194 cum->regno += int_nregs;
7195 cum->sse_regno += sse_nregs;
7197 else
7199 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7200 cum->words = (cum->words + align - 1) & ~(align - 1);
7201 cum->words += words;
7205 static void
7206 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7207 HOST_WIDE_INT words)
7209 /* Otherwise, this should be passed indirect. */
7210 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7212 cum->words += words;
7213 if (cum->nregs > 0)
7215 cum->nregs -= 1;
7216 cum->regno += 1;
7220 /* Update the data in CUM to advance over an argument of mode MODE and
7221 data type TYPE. (TYPE is null for libcalls where that information
7222 may not be available.) */
7224 static void
7225 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7226 const_tree type, bool named)
7228 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7229 HOST_WIDE_INT bytes, words;
7231 if (mode == BLKmode)
7232 bytes = int_size_in_bytes (type);
7233 else
7234 bytes = GET_MODE_SIZE (mode);
7235 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7237 if (type)
7238 mode = type_natural_mode (type, NULL, false);
7240 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7241 function_arg_advance_ms_64 (cum, bytes, words);
7242 else if (TARGET_64BIT)
7243 function_arg_advance_64 (cum, mode, type, words, named);
7244 else
7245 function_arg_advance_32 (cum, mode, type, bytes, words);
7248 /* Define where to put the arguments to a function.
7249 Value is zero to push the argument on the stack,
7250 or a hard register in which to store the argument.
7252 MODE is the argument's machine mode.
7253 TYPE is the data type of the argument (as a tree).
7254 This is null for libcalls where that information may
7255 not be available.
7256 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7257 the preceding args and about the function being called.
7258 NAMED is nonzero if this argument is a named parameter
7259 (otherwise it is an extra parameter matching an ellipsis). */
7261 static rtx
7262 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7263 enum machine_mode orig_mode, const_tree type,
7264 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7266 /* Avoid the AL settings for the Unix64 ABI. */
7267 if (mode == VOIDmode)
7268 return constm1_rtx;
7270 switch (mode)
7272 default:
7273 break;
7275 case BLKmode:
7276 if (bytes < 0)
7277 break;
7278 /* FALLTHRU */
7279 case DImode:
7280 case SImode:
7281 case HImode:
7282 case QImode:
7283 if (words <= cum->nregs)
7285 int regno = cum->regno;
7287 /* Fastcall allocates the first two DWORD (SImode) or
7288 smaller arguments to ECX and EDX if it isn't an
7289 aggregate type . */
7290 if (cum->fastcall)
7292 if (mode == BLKmode
7293 || mode == DImode
7294 || (type && AGGREGATE_TYPE_P (type)))
7295 break;
7297 /* ECX not EAX is the first allocated register. */
7298 if (regno == AX_REG)
7299 regno = CX_REG;
7301 return gen_rtx_REG (mode, regno);
7303 break;
7305 case DFmode:
7306 if (cum->float_in_sse < 2)
7307 break;
7308 case SFmode:
7309 if (cum->float_in_sse < 1)
7310 break;
7311 /* FALLTHRU */
7312 case TImode:
7313 /* In 32bit, we pass TImode in xmm registers. */
7314 case V16QImode:
7315 case V8HImode:
7316 case V4SImode:
7317 case V2DImode:
7318 case V4SFmode:
7319 case V2DFmode:
7320 if (!type || !AGGREGATE_TYPE_P (type))
7322 if (cum->sse_nregs)
7323 return gen_reg_or_parallel (mode, orig_mode,
7324 cum->sse_regno + FIRST_SSE_REG);
7326 break;
7328 case OImode:
7329 case XImode:
7330 /* OImode and XImode shouldn't be used directly. */
7331 gcc_unreachable ();
7333 case V64QImode:
7334 case V32HImode:
7335 case V16SImode:
7336 case V8DImode:
7337 case V16SFmode:
7338 case V8DFmode:
7339 case V8SFmode:
7340 case V8SImode:
7341 case V32QImode:
7342 case V16HImode:
7343 case V4DFmode:
7344 case V4DImode:
7345 if (!type || !AGGREGATE_TYPE_P (type))
7347 if (cum->sse_nregs)
7348 return gen_reg_or_parallel (mode, orig_mode,
7349 cum->sse_regno + FIRST_SSE_REG);
7351 break;
7353 case V8QImode:
7354 case V4HImode:
7355 case V2SImode:
7356 case V2SFmode:
7357 case V1TImode:
7358 case V1DImode:
7359 if (!type || !AGGREGATE_TYPE_P (type))
7361 if (cum->mmx_nregs)
7362 return gen_reg_or_parallel (mode, orig_mode,
7363 cum->mmx_regno + FIRST_MMX_REG);
7365 break;
7368 return NULL_RTX;
7371 static rtx
7372 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7373 enum machine_mode orig_mode, const_tree type, bool named)
7375 /* Handle a hidden AL argument containing number of registers
7376 for varargs x86-64 functions. */
7377 if (mode == VOIDmode)
7378 return GEN_INT (cum->maybe_vaarg
7379 ? (cum->sse_nregs < 0
7380 ? X86_64_SSE_REGPARM_MAX
7381 : cum->sse_regno)
7382 : -1);
7384 switch (mode)
7386 default:
7387 break;
7389 case V8SFmode:
7390 case V8SImode:
7391 case V32QImode:
7392 case V16HImode:
7393 case V4DFmode:
7394 case V4DImode:
7395 case V16SFmode:
7396 case V16SImode:
7397 case V64QImode:
7398 case V32HImode:
7399 case V8DFmode:
7400 case V8DImode:
7401 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7402 if (!named)
7403 return NULL;
7404 break;
7407 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7408 cum->sse_nregs,
7409 &x86_64_int_parameter_registers [cum->regno],
7410 cum->sse_regno);
7413 static rtx
7414 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7415 enum machine_mode orig_mode, bool named,
7416 HOST_WIDE_INT bytes)
7418 unsigned int regno;
7420 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7421 We use value of -2 to specify that current function call is MSABI. */
7422 if (mode == VOIDmode)
7423 return GEN_INT (-2);
7425 /* If we've run out of registers, it goes on the stack. */
7426 if (cum->nregs == 0)
7427 return NULL_RTX;
7429 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7431 /* Only floating point modes are passed in anything but integer regs. */
7432 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7434 if (named)
7435 regno = cum->regno + FIRST_SSE_REG;
7436 else
7438 rtx t1, t2;
7440 /* Unnamed floating parameters are passed in both the
7441 SSE and integer registers. */
7442 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7443 t2 = gen_rtx_REG (mode, regno);
7444 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7445 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7446 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7449 /* Handle aggregated types passed in register. */
7450 if (orig_mode == BLKmode)
7452 if (bytes > 0 && bytes <= 8)
7453 mode = (bytes > 4 ? DImode : SImode);
7454 if (mode == BLKmode)
7455 mode = DImode;
7458 return gen_reg_or_parallel (mode, orig_mode, regno);
7461 /* Return where to put the arguments to a function.
7462 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7464 MODE is the argument's machine mode. TYPE is the data type of the
7465 argument. It is null for libcalls where that information may not be
7466 available. CUM gives information about the preceding args and about
7467 the function being called. NAMED is nonzero if this argument is a
7468 named parameter (otherwise it is an extra parameter matching an
7469 ellipsis). */
7471 static rtx
7472 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7473 const_tree type, bool named)
7475 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7476 enum machine_mode mode = omode;
7477 HOST_WIDE_INT bytes, words;
7478 rtx arg;
7480 if (mode == BLKmode)
7481 bytes = int_size_in_bytes (type);
7482 else
7483 bytes = GET_MODE_SIZE (mode);
7484 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7486 /* To simplify the code below, represent vector types with a vector mode
7487 even if MMX/SSE are not active. */
7488 if (type && TREE_CODE (type) == VECTOR_TYPE)
7489 mode = type_natural_mode (type, cum, false);
7491 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7492 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7493 else if (TARGET_64BIT)
7494 arg = function_arg_64 (cum, mode, omode, type, named);
7495 else
7496 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7498 return arg;
7501 /* A C expression that indicates when an argument must be passed by
7502 reference. If nonzero for an argument, a copy of that argument is
7503 made in memory and a pointer to the argument is passed instead of
7504 the argument itself. The pointer is passed in whatever way is
7505 appropriate for passing a pointer to that type. */
7507 static bool
7508 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7509 const_tree type, bool named ATTRIBUTE_UNUSED)
7511 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7513 /* See Windows x64 Software Convention. */
7514 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7516 int msize = (int) GET_MODE_SIZE (mode);
7517 if (type)
7519 /* Arrays are passed by reference. */
7520 if (TREE_CODE (type) == ARRAY_TYPE)
7521 return true;
7523 if (AGGREGATE_TYPE_P (type))
7525 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7526 are passed by reference. */
7527 msize = int_size_in_bytes (type);
7531 /* __m128 is passed by reference. */
7532 switch (msize) {
7533 case 1: case 2: case 4: case 8:
7534 break;
7535 default:
7536 return true;
7539 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7540 return 1;
7542 return 0;
7545 /* Return true when TYPE should be 128bit aligned for 32bit argument
7546 passing ABI. XXX: This function is obsolete and is only used for
7547 checking psABI compatibility with previous versions of GCC. */
7549 static bool
7550 ix86_compat_aligned_value_p (const_tree type)
7552 enum machine_mode mode = TYPE_MODE (type);
7553 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7554 || mode == TDmode
7555 || mode == TFmode
7556 || mode == TCmode)
7557 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7558 return true;
7559 if (TYPE_ALIGN (type) < 128)
7560 return false;
7562 if (AGGREGATE_TYPE_P (type))
7564 /* Walk the aggregates recursively. */
7565 switch (TREE_CODE (type))
7567 case RECORD_TYPE:
7568 case UNION_TYPE:
7569 case QUAL_UNION_TYPE:
7571 tree field;
7573 /* Walk all the structure fields. */
7574 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7576 if (TREE_CODE (field) == FIELD_DECL
7577 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7578 return true;
7580 break;
7583 case ARRAY_TYPE:
7584 /* Just for use if some languages passes arrays by value. */
7585 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7586 return true;
7587 break;
7589 default:
7590 gcc_unreachable ();
7593 return false;
7596 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7597 XXX: This function is obsolete and is only used for checking psABI
7598 compatibility with previous versions of GCC. */
7600 static unsigned int
7601 ix86_compat_function_arg_boundary (enum machine_mode mode,
7602 const_tree type, unsigned int align)
7604 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7605 natural boundaries. */
7606 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7608 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7609 make an exception for SSE modes since these require 128bit
7610 alignment.
7612 The handling here differs from field_alignment. ICC aligns MMX
7613 arguments to 4 byte boundaries, while structure fields are aligned
7614 to 8 byte boundaries. */
7615 if (!type)
7617 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7618 align = PARM_BOUNDARY;
7620 else
7622 if (!ix86_compat_aligned_value_p (type))
7623 align = PARM_BOUNDARY;
7626 if (align > BIGGEST_ALIGNMENT)
7627 align = BIGGEST_ALIGNMENT;
7628 return align;
7631 /* Return true when TYPE should be 128bit aligned for 32bit argument
7632 passing ABI. */
7634 static bool
7635 ix86_contains_aligned_value_p (const_tree type)
7637 enum machine_mode mode = TYPE_MODE (type);
7639 if (mode == XFmode || mode == XCmode)
7640 return false;
7642 if (TYPE_ALIGN (type) < 128)
7643 return false;
7645 if (AGGREGATE_TYPE_P (type))
7647 /* Walk the aggregates recursively. */
7648 switch (TREE_CODE (type))
7650 case RECORD_TYPE:
7651 case UNION_TYPE:
7652 case QUAL_UNION_TYPE:
7654 tree field;
7656 /* Walk all the structure fields. */
7657 for (field = TYPE_FIELDS (type);
7658 field;
7659 field = DECL_CHAIN (field))
7661 if (TREE_CODE (field) == FIELD_DECL
7662 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7663 return true;
7665 break;
7668 case ARRAY_TYPE:
7669 /* Just for use if some languages passes arrays by value. */
7670 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7671 return true;
7672 break;
7674 default:
7675 gcc_unreachable ();
7678 else
7679 return TYPE_ALIGN (type) >= 128;
7681 return false;
7684 /* Gives the alignment boundary, in bits, of an argument with the
7685 specified mode and type. */
7687 static unsigned int
7688 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7690 unsigned int align;
7691 if (type)
7693 /* Since the main variant type is used for call, we convert it to
7694 the main variant type. */
7695 type = TYPE_MAIN_VARIANT (type);
7696 align = TYPE_ALIGN (type);
7698 else
7699 align = GET_MODE_ALIGNMENT (mode);
7700 if (align < PARM_BOUNDARY)
7701 align = PARM_BOUNDARY;
7702 else
7704 static bool warned;
7705 unsigned int saved_align = align;
7707 if (!TARGET_64BIT)
7709 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7710 if (!type)
7712 if (mode == XFmode || mode == XCmode)
7713 align = PARM_BOUNDARY;
7715 else if (!ix86_contains_aligned_value_p (type))
7716 align = PARM_BOUNDARY;
7718 if (align < 128)
7719 align = PARM_BOUNDARY;
7722 if (warn_psabi
7723 && !warned
7724 && align != ix86_compat_function_arg_boundary (mode, type,
7725 saved_align))
7727 warned = true;
7728 inform (input_location,
7729 "The ABI for passing parameters with %d-byte"
7730 " alignment has changed in GCC 4.6",
7731 align / BITS_PER_UNIT);
7735 return align;
7738 /* Return true if N is a possible register number of function value. */
7740 static bool
7741 ix86_function_value_regno_p (const unsigned int regno)
7743 switch (regno)
7745 case AX_REG:
7746 case DX_REG:
7747 return true;
7748 case DI_REG:
7749 case SI_REG:
7750 return TARGET_64BIT && ix86_abi != MS_ABI;
7752 /* Complex values are returned in %st(0)/%st(1) pair. */
7753 case ST0_REG:
7754 case ST1_REG:
7755 /* TODO: The function should depend on current function ABI but
7756 builtins.c would need updating then. Therefore we use the
7757 default ABI. */
7758 if (TARGET_64BIT && ix86_abi == MS_ABI)
7759 return false;
7760 return TARGET_FLOAT_RETURNS_IN_80387;
7762 /* Complex values are returned in %xmm0/%xmm1 pair. */
7763 case XMM0_REG:
7764 case XMM1_REG:
7765 return TARGET_SSE;
7767 case MM0_REG:
7768 if (TARGET_MACHO || TARGET_64BIT)
7769 return false;
7770 return TARGET_MMX;
7773 return false;
7776 /* Define how to find the value returned by a function.
7777 VALTYPE is the data type of the value (as a tree).
7778 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7779 otherwise, FUNC is 0. */
7781 static rtx
7782 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7783 const_tree fntype, const_tree fn)
7785 unsigned int regno;
7787 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7788 we normally prevent this case when mmx is not available. However
7789 some ABIs may require the result to be returned like DImode. */
7790 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7791 regno = FIRST_MMX_REG;
7793 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7794 we prevent this case when sse is not available. However some ABIs
7795 may require the result to be returned like integer TImode. */
7796 else if (mode == TImode
7797 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7798 regno = FIRST_SSE_REG;
7800 /* 32-byte vector modes in %ymm0. */
7801 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7802 regno = FIRST_SSE_REG;
7804 /* 64-byte vector modes in %zmm0. */
7805 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7806 regno = FIRST_SSE_REG;
7808 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7809 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7810 regno = FIRST_FLOAT_REG;
7811 else
7812 /* Most things go in %eax. */
7813 regno = AX_REG;
7815 /* Override FP return register with %xmm0 for local functions when
7816 SSE math is enabled or for functions with sseregparm attribute. */
7817 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7819 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7820 if ((sse_level >= 1 && mode == SFmode)
7821 || (sse_level == 2 && mode == DFmode))
7822 regno = FIRST_SSE_REG;
7825 /* OImode shouldn't be used directly. */
7826 gcc_assert (mode != OImode);
7828 return gen_rtx_REG (orig_mode, regno);
7831 static rtx
7832 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7833 const_tree valtype)
7835 rtx ret;
7837 /* Handle libcalls, which don't provide a type node. */
7838 if (valtype == NULL)
7840 unsigned int regno;
7842 switch (mode)
7844 case SFmode:
7845 case SCmode:
7846 case DFmode:
7847 case DCmode:
7848 case TFmode:
7849 case SDmode:
7850 case DDmode:
7851 case TDmode:
7852 regno = FIRST_SSE_REG;
7853 break;
7854 case XFmode:
7855 case XCmode:
7856 regno = FIRST_FLOAT_REG;
7857 break;
7858 case TCmode:
7859 return NULL;
7860 default:
7861 regno = AX_REG;
7864 return gen_rtx_REG (mode, regno);
7866 else if (POINTER_TYPE_P (valtype))
7868 /* Pointers are always returned in word_mode. */
7869 mode = word_mode;
7872 ret = construct_container (mode, orig_mode, valtype, 1,
7873 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7874 x86_64_int_return_registers, 0);
7876 /* For zero sized structures, construct_container returns NULL, but we
7877 need to keep rest of compiler happy by returning meaningful value. */
7878 if (!ret)
7879 ret = gen_rtx_REG (orig_mode, AX_REG);
7881 return ret;
7884 static rtx
7885 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7886 const_tree valtype)
7888 unsigned int regno = AX_REG;
7890 if (TARGET_SSE)
7892 switch (GET_MODE_SIZE (mode))
7894 case 16:
7895 if (valtype != NULL_TREE
7896 && !VECTOR_INTEGER_TYPE_P (valtype)
7897 && !VECTOR_INTEGER_TYPE_P (valtype)
7898 && !INTEGRAL_TYPE_P (valtype)
7899 && !VECTOR_FLOAT_TYPE_P (valtype))
7900 break;
7901 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7902 && !COMPLEX_MODE_P (mode))
7903 regno = FIRST_SSE_REG;
7904 break;
7905 case 8:
7906 case 4:
7907 if (mode == SFmode || mode == DFmode)
7908 regno = FIRST_SSE_REG;
7909 break;
7910 default:
7911 break;
7914 return gen_rtx_REG (orig_mode, regno);
7917 static rtx
7918 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7919 enum machine_mode orig_mode, enum machine_mode mode)
7921 const_tree fn, fntype;
7923 fn = NULL_TREE;
7924 if (fntype_or_decl && DECL_P (fntype_or_decl))
7925 fn = fntype_or_decl;
7926 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7928 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7929 return function_value_ms_64 (orig_mode, mode, valtype);
7930 else if (TARGET_64BIT)
7931 return function_value_64 (orig_mode, mode, valtype);
7932 else
7933 return function_value_32 (orig_mode, mode, fntype, fn);
7936 static rtx
7937 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7938 bool outgoing ATTRIBUTE_UNUSED)
7940 enum machine_mode mode, orig_mode;
7942 orig_mode = TYPE_MODE (valtype);
7943 mode = type_natural_mode (valtype, NULL, true);
7944 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7947 /* Pointer function arguments and return values are promoted to
7948 word_mode. */
7950 static enum machine_mode
7951 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7952 int *punsignedp, const_tree fntype,
7953 int for_return)
7955 if (type != NULL_TREE && POINTER_TYPE_P (type))
7957 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7958 return word_mode;
7960 return default_promote_function_mode (type, mode, punsignedp, fntype,
7961 for_return);
7964 /* Return true if a structure, union or array with MODE containing FIELD
7965 should be accessed using BLKmode. */
7967 static bool
7968 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7970 /* Union with XFmode must be in BLKmode. */
7971 return (mode == XFmode
7972 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7973 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7977 ix86_libcall_value (enum machine_mode mode)
7979 return ix86_function_value_1 (NULL, NULL, mode, mode);
7982 /* Return true iff type is returned in memory. */
7984 static bool ATTRIBUTE_UNUSED
7985 return_in_memory_32 (const_tree type, enum machine_mode mode)
7987 HOST_WIDE_INT size;
7989 if (mode == BLKmode)
7990 return true;
7992 size = int_size_in_bytes (type);
7994 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7995 return false;
7997 if (VECTOR_MODE_P (mode) || mode == TImode)
7999 /* User-created vectors small enough to fit in EAX. */
8000 if (size < 8)
8001 return false;
8003 /* MMX/3dNow values are returned in MM0,
8004 except when it doesn't exits or the ABI prescribes otherwise. */
8005 if (size == 8)
8006 return !TARGET_MMX || TARGET_VECT8_RETURNS;
8008 /* SSE values are returned in XMM0, except when it doesn't exist. */
8009 if (size == 16)
8010 return !TARGET_SSE;
8012 /* AVX values are returned in YMM0, except when it doesn't exist. */
8013 if (size == 32)
8014 return !TARGET_AVX;
8016 /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
8017 if (size == 64)
8018 return !TARGET_AVX512F;
8021 if (mode == XFmode)
8022 return false;
8024 if (size > 12)
8025 return true;
8027 /* OImode shouldn't be used directly. */
8028 gcc_assert (mode != OImode);
8030 return false;
8033 static bool ATTRIBUTE_UNUSED
8034 return_in_memory_64 (const_tree type, enum machine_mode mode)
8036 int needed_intregs, needed_sseregs;
8037 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
8040 static bool ATTRIBUTE_UNUSED
8041 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
8043 HOST_WIDE_INT size = int_size_in_bytes (type);
8045 /* __m128 is returned in xmm0. */
8046 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
8047 || VECTOR_FLOAT_TYPE_P (type))
8048 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8049 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
8050 return false;
8052 /* Otherwise, the size must be exactly in [1248]. */
8053 return size != 1 && size != 2 && size != 4 && size != 8;
8056 static bool
8057 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8059 #ifdef SUBTARGET_RETURN_IN_MEMORY
8060 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8061 #else
8062 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8064 if (TARGET_64BIT)
8066 if (ix86_function_type_abi (fntype) == MS_ABI)
8067 return return_in_memory_ms_64 (type, mode);
8068 else
8069 return return_in_memory_64 (type, mode);
8071 else
8072 return return_in_memory_32 (type, mode);
8073 #endif
8077 /* Create the va_list data type. */
8079 /* Returns the calling convention specific va_list date type.
8080 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8082 static tree
8083 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8085 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8087 /* For i386 we use plain pointer to argument area. */
8088 if (!TARGET_64BIT || abi == MS_ABI)
8089 return build_pointer_type (char_type_node);
8091 record = lang_hooks.types.make_type (RECORD_TYPE);
8092 type_decl = build_decl (BUILTINS_LOCATION,
8093 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8095 f_gpr = build_decl (BUILTINS_LOCATION,
8096 FIELD_DECL, get_identifier ("gp_offset"),
8097 unsigned_type_node);
8098 f_fpr = build_decl (BUILTINS_LOCATION,
8099 FIELD_DECL, get_identifier ("fp_offset"),
8100 unsigned_type_node);
8101 f_ovf = build_decl (BUILTINS_LOCATION,
8102 FIELD_DECL, get_identifier ("overflow_arg_area"),
8103 ptr_type_node);
8104 f_sav = build_decl (BUILTINS_LOCATION,
8105 FIELD_DECL, get_identifier ("reg_save_area"),
8106 ptr_type_node);
8108 va_list_gpr_counter_field = f_gpr;
8109 va_list_fpr_counter_field = f_fpr;
8111 DECL_FIELD_CONTEXT (f_gpr) = record;
8112 DECL_FIELD_CONTEXT (f_fpr) = record;
8113 DECL_FIELD_CONTEXT (f_ovf) = record;
8114 DECL_FIELD_CONTEXT (f_sav) = record;
8116 TYPE_STUB_DECL (record) = type_decl;
8117 TYPE_NAME (record) = type_decl;
8118 TYPE_FIELDS (record) = f_gpr;
8119 DECL_CHAIN (f_gpr) = f_fpr;
8120 DECL_CHAIN (f_fpr) = f_ovf;
8121 DECL_CHAIN (f_ovf) = f_sav;
8123 layout_type (record);
8125 /* The correct type is an array type of one element. */
8126 return build_array_type (record, build_index_type (size_zero_node));
8129 /* Setup the builtin va_list data type and for 64-bit the additional
8130 calling convention specific va_list data types. */
8132 static tree
8133 ix86_build_builtin_va_list (void)
8135 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8137 /* Initialize abi specific va_list builtin types. */
8138 if (TARGET_64BIT)
8140 tree t;
8141 if (ix86_abi == MS_ABI)
8143 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8144 if (TREE_CODE (t) != RECORD_TYPE)
8145 t = build_variant_type_copy (t);
8146 sysv_va_list_type_node = t;
8148 else
8150 t = ret;
8151 if (TREE_CODE (t) != RECORD_TYPE)
8152 t = build_variant_type_copy (t);
8153 sysv_va_list_type_node = t;
8155 if (ix86_abi != MS_ABI)
8157 t = ix86_build_builtin_va_list_abi (MS_ABI);
8158 if (TREE_CODE (t) != RECORD_TYPE)
8159 t = build_variant_type_copy (t);
8160 ms_va_list_type_node = t;
8162 else
8164 t = ret;
8165 if (TREE_CODE (t) != RECORD_TYPE)
8166 t = build_variant_type_copy (t);
8167 ms_va_list_type_node = t;
8171 return ret;
8174 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8176 static void
8177 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8179 rtx save_area, mem;
8180 alias_set_type set;
8181 int i, max;
8183 /* GPR size of varargs save area. */
8184 if (cfun->va_list_gpr_size)
8185 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8186 else
8187 ix86_varargs_gpr_size = 0;
8189 /* FPR size of varargs save area. We don't need it if we don't pass
8190 anything in SSE registers. */
8191 if (TARGET_SSE && cfun->va_list_fpr_size)
8192 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8193 else
8194 ix86_varargs_fpr_size = 0;
8196 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8197 return;
8199 save_area = frame_pointer_rtx;
8200 set = get_varargs_alias_set ();
8202 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8203 if (max > X86_64_REGPARM_MAX)
8204 max = X86_64_REGPARM_MAX;
8206 for (i = cum->regno; i < max; i++)
8208 mem = gen_rtx_MEM (word_mode,
8209 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8210 MEM_NOTRAP_P (mem) = 1;
8211 set_mem_alias_set (mem, set);
8212 emit_move_insn (mem,
8213 gen_rtx_REG (word_mode,
8214 x86_64_int_parameter_registers[i]));
8217 if (ix86_varargs_fpr_size)
8219 enum machine_mode smode;
8220 rtx label, test;
8222 /* Now emit code to save SSE registers. The AX parameter contains number
8223 of SSE parameter registers used to call this function, though all we
8224 actually check here is the zero/non-zero status. */
8226 label = gen_label_rtx ();
8227 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8228 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8229 label));
8231 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8232 we used movdqa (i.e. TImode) instead? Perhaps even better would
8233 be if we could determine the real mode of the data, via a hook
8234 into pass_stdarg. Ignore all that for now. */
8235 smode = V4SFmode;
8236 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8237 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8239 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8240 if (max > X86_64_SSE_REGPARM_MAX)
8241 max = X86_64_SSE_REGPARM_MAX;
8243 for (i = cum->sse_regno; i < max; ++i)
8245 mem = plus_constant (Pmode, save_area,
8246 i * 16 + ix86_varargs_gpr_size);
8247 mem = gen_rtx_MEM (smode, mem);
8248 MEM_NOTRAP_P (mem) = 1;
8249 set_mem_alias_set (mem, set);
8250 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8252 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8255 emit_label (label);
8259 static void
8260 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8262 alias_set_type set = get_varargs_alias_set ();
8263 int i;
8265 /* Reset to zero, as there might be a sysv vaarg used
8266 before. */
8267 ix86_varargs_gpr_size = 0;
8268 ix86_varargs_fpr_size = 0;
8270 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8272 rtx reg, mem;
8274 mem = gen_rtx_MEM (Pmode,
8275 plus_constant (Pmode, virtual_incoming_args_rtx,
8276 i * UNITS_PER_WORD));
8277 MEM_NOTRAP_P (mem) = 1;
8278 set_mem_alias_set (mem, set);
8280 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8281 emit_move_insn (mem, reg);
8285 static void
8286 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8287 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8288 int no_rtl)
8290 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8291 CUMULATIVE_ARGS next_cum;
8292 tree fntype;
8294 /* This argument doesn't appear to be used anymore. Which is good,
8295 because the old code here didn't suppress rtl generation. */
8296 gcc_assert (!no_rtl);
8298 if (!TARGET_64BIT)
8299 return;
8301 fntype = TREE_TYPE (current_function_decl);
8303 /* For varargs, we do not want to skip the dummy va_dcl argument.
8304 For stdargs, we do want to skip the last named argument. */
8305 next_cum = *cum;
8306 if (stdarg_p (fntype))
8307 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8308 true);
8310 if (cum->call_abi == MS_ABI)
8311 setup_incoming_varargs_ms_64 (&next_cum);
8312 else
8313 setup_incoming_varargs_64 (&next_cum);
8316 /* Checks if TYPE is of kind va_list char *. */
8318 static bool
8319 is_va_list_char_pointer (tree type)
8321 tree canonic;
8323 /* For 32-bit it is always true. */
8324 if (!TARGET_64BIT)
8325 return true;
8326 canonic = ix86_canonical_va_list_type (type);
8327 return (canonic == ms_va_list_type_node
8328 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8331 /* Implement va_start. */
8333 static void
8334 ix86_va_start (tree valist, rtx nextarg)
8336 HOST_WIDE_INT words, n_gpr, n_fpr;
8337 tree f_gpr, f_fpr, f_ovf, f_sav;
8338 tree gpr, fpr, ovf, sav, t;
8339 tree type;
8340 rtx ovf_rtx;
8342 if (flag_split_stack
8343 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8345 unsigned int scratch_regno;
8347 /* When we are splitting the stack, we can't refer to the stack
8348 arguments using internal_arg_pointer, because they may be on
8349 the old stack. The split stack prologue will arrange to
8350 leave a pointer to the old stack arguments in a scratch
8351 register, which we here copy to a pseudo-register. The split
8352 stack prologue can't set the pseudo-register directly because
8353 it (the prologue) runs before any registers have been saved. */
8355 scratch_regno = split_stack_prologue_scratch_regno ();
8356 if (scratch_regno != INVALID_REGNUM)
8358 rtx reg, seq;
8360 reg = gen_reg_rtx (Pmode);
8361 cfun->machine->split_stack_varargs_pointer = reg;
8363 start_sequence ();
8364 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8365 seq = get_insns ();
8366 end_sequence ();
8368 push_topmost_sequence ();
8369 emit_insn_after (seq, entry_of_function ());
8370 pop_topmost_sequence ();
8374 /* Only 64bit target needs something special. */
8375 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8377 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8378 std_expand_builtin_va_start (valist, nextarg);
8379 else
8381 rtx va_r, next;
8383 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8384 next = expand_binop (ptr_mode, add_optab,
8385 cfun->machine->split_stack_varargs_pointer,
8386 crtl->args.arg_offset_rtx,
8387 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8388 convert_move (va_r, next, 0);
8390 return;
8393 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8394 f_fpr = DECL_CHAIN (f_gpr);
8395 f_ovf = DECL_CHAIN (f_fpr);
8396 f_sav = DECL_CHAIN (f_ovf);
8398 valist = build_simple_mem_ref (valist);
8399 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8400 /* The following should be folded into the MEM_REF offset. */
8401 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8402 f_gpr, NULL_TREE);
8403 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8404 f_fpr, NULL_TREE);
8405 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8406 f_ovf, NULL_TREE);
8407 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8408 f_sav, NULL_TREE);
8410 /* Count number of gp and fp argument registers used. */
8411 words = crtl->args.info.words;
8412 n_gpr = crtl->args.info.regno;
8413 n_fpr = crtl->args.info.sse_regno;
8415 if (cfun->va_list_gpr_size)
8417 type = TREE_TYPE (gpr);
8418 t = build2 (MODIFY_EXPR, type,
8419 gpr, build_int_cst (type, n_gpr * 8));
8420 TREE_SIDE_EFFECTS (t) = 1;
8421 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8424 if (TARGET_SSE && cfun->va_list_fpr_size)
8426 type = TREE_TYPE (fpr);
8427 t = build2 (MODIFY_EXPR, type, fpr,
8428 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8429 TREE_SIDE_EFFECTS (t) = 1;
8430 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8433 /* Find the overflow area. */
8434 type = TREE_TYPE (ovf);
8435 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8436 ovf_rtx = crtl->args.internal_arg_pointer;
8437 else
8438 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8439 t = make_tree (type, ovf_rtx);
8440 if (words != 0)
8441 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8442 t = build2 (MODIFY_EXPR, type, ovf, t);
8443 TREE_SIDE_EFFECTS (t) = 1;
8444 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8446 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8448 /* Find the register save area.
8449 Prologue of the function save it right above stack frame. */
8450 type = TREE_TYPE (sav);
8451 t = make_tree (type, frame_pointer_rtx);
8452 if (!ix86_varargs_gpr_size)
8453 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8454 t = build2 (MODIFY_EXPR, type, sav, t);
8455 TREE_SIDE_EFFECTS (t) = 1;
8456 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8460 /* Implement va_arg. */
8462 static tree
8463 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8464 gimple_seq *post_p)
8466 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8467 tree f_gpr, f_fpr, f_ovf, f_sav;
8468 tree gpr, fpr, ovf, sav, t;
8469 int size, rsize;
8470 tree lab_false, lab_over = NULL_TREE;
8471 tree addr, t2;
8472 rtx container;
8473 int indirect_p = 0;
8474 tree ptrtype;
8475 enum machine_mode nat_mode;
8476 unsigned int arg_boundary;
8478 /* Only 64bit target needs something special. */
8479 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8480 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8482 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8483 f_fpr = DECL_CHAIN (f_gpr);
8484 f_ovf = DECL_CHAIN (f_fpr);
8485 f_sav = DECL_CHAIN (f_ovf);
8487 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8488 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8489 valist = build_va_arg_indirect_ref (valist);
8490 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8491 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8492 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8494 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8495 if (indirect_p)
8496 type = build_pointer_type (type);
8497 size = int_size_in_bytes (type);
8498 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8500 nat_mode = type_natural_mode (type, NULL, false);
8501 switch (nat_mode)
8503 case V8SFmode:
8504 case V8SImode:
8505 case V32QImode:
8506 case V16HImode:
8507 case V4DFmode:
8508 case V4DImode:
8509 case V16SFmode:
8510 case V16SImode:
8511 case V64QImode:
8512 case V32HImode:
8513 case V8DFmode:
8514 case V8DImode:
8515 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8516 if (!TARGET_64BIT_MS_ABI)
8518 container = NULL;
8519 break;
8522 default:
8523 container = construct_container (nat_mode, TYPE_MODE (type),
8524 type, 0, X86_64_REGPARM_MAX,
8525 X86_64_SSE_REGPARM_MAX, intreg,
8527 break;
8530 /* Pull the value out of the saved registers. */
8532 addr = create_tmp_var (ptr_type_node, "addr");
8534 if (container)
8536 int needed_intregs, needed_sseregs;
8537 bool need_temp;
8538 tree int_addr, sse_addr;
8540 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8541 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8543 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8545 need_temp = (!REG_P (container)
8546 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8547 || TYPE_ALIGN (type) > 128));
8549 /* In case we are passing structure, verify that it is consecutive block
8550 on the register save area. If not we need to do moves. */
8551 if (!need_temp && !REG_P (container))
8553 /* Verify that all registers are strictly consecutive */
8554 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8556 int i;
8558 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8560 rtx slot = XVECEXP (container, 0, i);
8561 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8562 || INTVAL (XEXP (slot, 1)) != i * 16)
8563 need_temp = 1;
8566 else
8568 int i;
8570 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8572 rtx slot = XVECEXP (container, 0, i);
8573 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8574 || INTVAL (XEXP (slot, 1)) != i * 8)
8575 need_temp = 1;
8579 if (!need_temp)
8581 int_addr = addr;
8582 sse_addr = addr;
8584 else
8586 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8587 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8590 /* First ensure that we fit completely in registers. */
8591 if (needed_intregs)
8593 t = build_int_cst (TREE_TYPE (gpr),
8594 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8595 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8596 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8597 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8598 gimplify_and_add (t, pre_p);
8600 if (needed_sseregs)
8602 t = build_int_cst (TREE_TYPE (fpr),
8603 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8604 + X86_64_REGPARM_MAX * 8);
8605 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8606 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8607 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8608 gimplify_and_add (t, pre_p);
8611 /* Compute index to start of area used for integer regs. */
8612 if (needed_intregs)
8614 /* int_addr = gpr + sav; */
8615 t = fold_build_pointer_plus (sav, gpr);
8616 gimplify_assign (int_addr, t, pre_p);
8618 if (needed_sseregs)
8620 /* sse_addr = fpr + sav; */
8621 t = fold_build_pointer_plus (sav, fpr);
8622 gimplify_assign (sse_addr, t, pre_p);
8624 if (need_temp)
8626 int i, prev_size = 0;
8627 tree temp = create_tmp_var (type, "va_arg_tmp");
8629 /* addr = &temp; */
8630 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8631 gimplify_assign (addr, t, pre_p);
8633 for (i = 0; i < XVECLEN (container, 0); i++)
8635 rtx slot = XVECEXP (container, 0, i);
8636 rtx reg = XEXP (slot, 0);
8637 enum machine_mode mode = GET_MODE (reg);
8638 tree piece_type;
8639 tree addr_type;
8640 tree daddr_type;
8641 tree src_addr, src;
8642 int src_offset;
8643 tree dest_addr, dest;
8644 int cur_size = GET_MODE_SIZE (mode);
8646 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8647 prev_size = INTVAL (XEXP (slot, 1));
8648 if (prev_size + cur_size > size)
8650 cur_size = size - prev_size;
8651 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8652 if (mode == BLKmode)
8653 mode = QImode;
8655 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8656 if (mode == GET_MODE (reg))
8657 addr_type = build_pointer_type (piece_type);
8658 else
8659 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8660 true);
8661 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8662 true);
8664 if (SSE_REGNO_P (REGNO (reg)))
8666 src_addr = sse_addr;
8667 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8669 else
8671 src_addr = int_addr;
8672 src_offset = REGNO (reg) * 8;
8674 src_addr = fold_convert (addr_type, src_addr);
8675 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8677 dest_addr = fold_convert (daddr_type, addr);
8678 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8679 if (cur_size == GET_MODE_SIZE (mode))
8681 src = build_va_arg_indirect_ref (src_addr);
8682 dest = build_va_arg_indirect_ref (dest_addr);
8684 gimplify_assign (dest, src, pre_p);
8686 else
8688 tree copy
8689 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8690 3, dest_addr, src_addr,
8691 size_int (cur_size));
8692 gimplify_and_add (copy, pre_p);
8694 prev_size += cur_size;
8698 if (needed_intregs)
8700 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8701 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8702 gimplify_assign (gpr, t, pre_p);
8705 if (needed_sseregs)
8707 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8708 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8709 gimplify_assign (fpr, t, pre_p);
8712 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8714 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8717 /* ... otherwise out of the overflow area. */
8719 /* When we align parameter on stack for caller, if the parameter
8720 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8721 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8722 here with caller. */
8723 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8724 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8725 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8727 /* Care for on-stack alignment if needed. */
8728 if (arg_boundary <= 64 || size == 0)
8729 t = ovf;
8730 else
8732 HOST_WIDE_INT align = arg_boundary / 8;
8733 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8734 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8735 build_int_cst (TREE_TYPE (t), -align));
8738 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8739 gimplify_assign (addr, t, pre_p);
8741 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8742 gimplify_assign (unshare_expr (ovf), t, pre_p);
8744 if (container)
8745 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8747 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8748 addr = fold_convert (ptrtype, addr);
8750 if (indirect_p)
8751 addr = build_va_arg_indirect_ref (addr);
8752 return build_va_arg_indirect_ref (addr);
8755 /* Return true if OPNUM's MEM should be matched
8756 in movabs* patterns. */
8758 bool
8759 ix86_check_movabs (rtx insn, int opnum)
8761 rtx set, mem;
8763 set = PATTERN (insn);
8764 if (GET_CODE (set) == PARALLEL)
8765 set = XVECEXP (set, 0, 0);
8766 gcc_assert (GET_CODE (set) == SET);
8767 mem = XEXP (set, opnum);
8768 while (GET_CODE (mem) == SUBREG)
8769 mem = SUBREG_REG (mem);
8770 gcc_assert (MEM_P (mem));
8771 return volatile_ok || !MEM_VOLATILE_P (mem);
8774 /* Initialize the table of extra 80387 mathematical constants. */
8776 static void
8777 init_ext_80387_constants (void)
8779 static const char * cst[5] =
8781 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8782 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8783 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8784 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8785 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8787 int i;
8789 for (i = 0; i < 5; i++)
8791 real_from_string (&ext_80387_constants_table[i], cst[i]);
8792 /* Ensure each constant is rounded to XFmode precision. */
8793 real_convert (&ext_80387_constants_table[i],
8794 XFmode, &ext_80387_constants_table[i]);
8797 ext_80387_constants_init = 1;
8800 /* Return non-zero if the constant is something that
8801 can be loaded with a special instruction. */
8804 standard_80387_constant_p (rtx x)
8806 enum machine_mode mode = GET_MODE (x);
8808 REAL_VALUE_TYPE r;
8810 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8811 return -1;
8813 if (x == CONST0_RTX (mode))
8814 return 1;
8815 if (x == CONST1_RTX (mode))
8816 return 2;
8818 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8820 /* For XFmode constants, try to find a special 80387 instruction when
8821 optimizing for size or on those CPUs that benefit from them. */
8822 if (mode == XFmode
8823 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8825 int i;
8827 if (! ext_80387_constants_init)
8828 init_ext_80387_constants ();
8830 for (i = 0; i < 5; i++)
8831 if (real_identical (&r, &ext_80387_constants_table[i]))
8832 return i + 3;
8835 /* Load of the constant -0.0 or -1.0 will be split as
8836 fldz;fchs or fld1;fchs sequence. */
8837 if (real_isnegzero (&r))
8838 return 8;
8839 if (real_identical (&r, &dconstm1))
8840 return 9;
8842 return 0;
8845 /* Return the opcode of the special instruction to be used to load
8846 the constant X. */
8848 const char *
8849 standard_80387_constant_opcode (rtx x)
8851 switch (standard_80387_constant_p (x))
8853 case 1:
8854 return "fldz";
8855 case 2:
8856 return "fld1";
8857 case 3:
8858 return "fldlg2";
8859 case 4:
8860 return "fldln2";
8861 case 5:
8862 return "fldl2e";
8863 case 6:
8864 return "fldl2t";
8865 case 7:
8866 return "fldpi";
8867 case 8:
8868 case 9:
8869 return "#";
8870 default:
8871 gcc_unreachable ();
8875 /* Return the CONST_DOUBLE representing the 80387 constant that is
8876 loaded by the specified special instruction. The argument IDX
8877 matches the return value from standard_80387_constant_p. */
8880 standard_80387_constant_rtx (int idx)
8882 int i;
8884 if (! ext_80387_constants_init)
8885 init_ext_80387_constants ();
8887 switch (idx)
8889 case 3:
8890 case 4:
8891 case 5:
8892 case 6:
8893 case 7:
8894 i = idx - 3;
8895 break;
8897 default:
8898 gcc_unreachable ();
8901 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8902 XFmode);
8905 /* Return 1 if X is all 0s and 2 if x is all 1s
8906 in supported SSE/AVX vector mode. */
8909 standard_sse_constant_p (rtx x)
8911 enum machine_mode mode = GET_MODE (x);
8913 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8914 return 1;
8915 if (vector_all_ones_operand (x, mode))
8916 switch (mode)
8918 case V16QImode:
8919 case V8HImode:
8920 case V4SImode:
8921 case V2DImode:
8922 if (TARGET_SSE2)
8923 return 2;
8924 case V32QImode:
8925 case V16HImode:
8926 case V8SImode:
8927 case V4DImode:
8928 if (TARGET_AVX2)
8929 return 2;
8930 case V64QImode:
8931 case V32HImode:
8932 case V16SImode:
8933 case V8DImode:
8934 if (TARGET_AVX512F)
8935 return 2;
8936 default:
8937 break;
8940 return 0;
8943 /* Return the opcode of the special instruction to be used to load
8944 the constant X. */
8946 const char *
8947 standard_sse_constant_opcode (rtx insn, rtx x)
8949 switch (standard_sse_constant_p (x))
8951 case 1:
8952 switch (get_attr_mode (insn))
8954 case MODE_XI:
8955 case MODE_V16SF:
8956 return "vpxord\t%g0, %g0, %g0";
8957 case MODE_V8DF:
8958 return "vpxorq\t%g0, %g0, %g0";
8959 case MODE_TI:
8960 return "%vpxor\t%0, %d0";
8961 case MODE_V2DF:
8962 return "%vxorpd\t%0, %d0";
8963 case MODE_V4SF:
8964 return "%vxorps\t%0, %d0";
8966 case MODE_OI:
8967 return "vpxor\t%x0, %x0, %x0";
8968 case MODE_V4DF:
8969 return "vxorpd\t%x0, %x0, %x0";
8970 case MODE_V8SF:
8971 return "vxorps\t%x0, %x0, %x0";
8973 default:
8974 break;
8977 case 2:
8978 if (get_attr_mode (insn) == MODE_XI
8979 || get_attr_mode (insn) == MODE_V8DF
8980 || get_attr_mode (insn) == MODE_V16SF)
8981 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8982 if (TARGET_AVX)
8983 return "vpcmpeqd\t%0, %0, %0";
8984 else
8985 return "pcmpeqd\t%0, %0";
8987 default:
8988 break;
8990 gcc_unreachable ();
8993 /* Returns true if OP contains a symbol reference */
8995 bool
8996 symbolic_reference_mentioned_p (rtx op)
8998 const char *fmt;
8999 int i;
9001 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9002 return true;
9004 fmt = GET_RTX_FORMAT (GET_CODE (op));
9005 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9007 if (fmt[i] == 'E')
9009 int j;
9011 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9012 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9013 return true;
9016 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9017 return true;
9020 return false;
9023 /* Return true if it is appropriate to emit `ret' instructions in the
9024 body of a function. Do this only if the epilogue is simple, needing a
9025 couple of insns. Prior to reloading, we can't tell how many registers
9026 must be saved, so return false then. Return false if there is no frame
9027 marker to de-allocate. */
9029 bool
9030 ix86_can_use_return_insn_p (void)
9032 struct ix86_frame frame;
9034 if (! reload_completed || frame_pointer_needed)
9035 return 0;
9037 /* Don't allow more than 32k pop, since that's all we can do
9038 with one instruction. */
9039 if (crtl->args.pops_args && crtl->args.size >= 32768)
9040 return 0;
9042 ix86_compute_frame_layout (&frame);
9043 return (frame.stack_pointer_offset == UNITS_PER_WORD
9044 && (frame.nregs + frame.nsseregs) == 0);
9047 /* Value should be nonzero if functions must have frame pointers.
9048 Zero means the frame pointer need not be set up (and parms may
9049 be accessed via the stack pointer) in functions that seem suitable. */
9051 static bool
9052 ix86_frame_pointer_required (void)
9054 /* If we accessed previous frames, then the generated code expects
9055 to be able to access the saved ebp value in our frame. */
9056 if (cfun->machine->accesses_prev_frame)
9057 return true;
9059 /* Several x86 os'es need a frame pointer for other reasons,
9060 usually pertaining to setjmp. */
9061 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9062 return true;
9064 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9065 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9066 return true;
9068 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9069 allocation is 4GB. */
9070 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9071 return true;
9073 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9074 turns off the frame pointer by default. Turn it back on now if
9075 we've not got a leaf function. */
9076 if (TARGET_OMIT_LEAF_FRAME_POINTER
9077 && (!crtl->is_leaf
9078 || ix86_current_function_calls_tls_descriptor))
9079 return true;
9081 if (crtl->profile && !flag_fentry)
9082 return true;
9084 return false;
9087 /* Record that the current function accesses previous call frames. */
9089 void
9090 ix86_setup_frame_addresses (void)
9092 cfun->machine->accesses_prev_frame = 1;
9095 #ifndef USE_HIDDEN_LINKONCE
9096 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9097 # define USE_HIDDEN_LINKONCE 1
9098 # else
9099 # define USE_HIDDEN_LINKONCE 0
9100 # endif
9101 #endif
9103 static int pic_labels_used;
9105 /* Fills in the label name that should be used for a pc thunk for
9106 the given register. */
9108 static void
9109 get_pc_thunk_name (char name[32], unsigned int regno)
9111 gcc_assert (!TARGET_64BIT);
9113 if (USE_HIDDEN_LINKONCE)
9114 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9115 else
9116 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9120 /* This function generates code for -fpic that loads %ebx with
9121 the return address of the caller and then returns. */
9123 static void
9124 ix86_code_end (void)
9126 rtx xops[2];
9127 int regno;
9129 for (regno = AX_REG; regno <= SP_REG; regno++)
9131 char name[32];
9132 tree decl;
9134 if (!(pic_labels_used & (1 << regno)))
9135 continue;
9137 get_pc_thunk_name (name, regno);
9139 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9140 get_identifier (name),
9141 build_function_type_list (void_type_node, NULL_TREE));
9142 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9143 NULL_TREE, void_type_node);
9144 TREE_PUBLIC (decl) = 1;
9145 TREE_STATIC (decl) = 1;
9146 DECL_IGNORED_P (decl) = 1;
9148 #if TARGET_MACHO
9149 if (TARGET_MACHO)
9151 switch_to_section (darwin_sections[text_coal_section]);
9152 fputs ("\t.weak_definition\t", asm_out_file);
9153 assemble_name (asm_out_file, name);
9154 fputs ("\n\t.private_extern\t", asm_out_file);
9155 assemble_name (asm_out_file, name);
9156 putc ('\n', asm_out_file);
9157 ASM_OUTPUT_LABEL (asm_out_file, name);
9158 DECL_WEAK (decl) = 1;
9160 else
9161 #endif
9162 if (USE_HIDDEN_LINKONCE)
9164 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9166 targetm.asm_out.unique_section (decl, 0);
9167 switch_to_section (get_named_section (decl, NULL, 0));
9169 targetm.asm_out.globalize_label (asm_out_file, name);
9170 fputs ("\t.hidden\t", asm_out_file);
9171 assemble_name (asm_out_file, name);
9172 putc ('\n', asm_out_file);
9173 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9175 else
9177 switch_to_section (text_section);
9178 ASM_OUTPUT_LABEL (asm_out_file, name);
9181 DECL_INITIAL (decl) = make_node (BLOCK);
9182 current_function_decl = decl;
9183 init_function_start (decl);
9184 first_function_block_is_cold = false;
9185 /* Make sure unwind info is emitted for the thunk if needed. */
9186 final_start_function (emit_barrier (), asm_out_file, 1);
9188 /* Pad stack IP move with 4 instructions (two NOPs count
9189 as one instruction). */
9190 if (TARGET_PAD_SHORT_FUNCTION)
9192 int i = 8;
9194 while (i--)
9195 fputs ("\tnop\n", asm_out_file);
9198 xops[0] = gen_rtx_REG (Pmode, regno);
9199 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9200 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9201 fputs ("\tret\n", asm_out_file);
9202 final_end_function ();
9203 init_insn_lengths ();
9204 free_after_compilation (cfun);
9205 set_cfun (NULL);
9206 current_function_decl = NULL;
9209 if (flag_split_stack)
9210 file_end_indicate_split_stack ();
9213 /* Emit code for the SET_GOT patterns. */
9215 const char *
9216 output_set_got (rtx dest, rtx label)
9218 rtx xops[3];
9220 xops[0] = dest;
9222 if (TARGET_VXWORKS_RTP && flag_pic)
9224 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9225 xops[2] = gen_rtx_MEM (Pmode,
9226 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9227 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9229 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9230 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9231 an unadorned address. */
9232 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9233 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9234 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9235 return "";
9238 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9240 if (!flag_pic)
9242 if (TARGET_MACHO)
9243 /* We don't need a pic base, we're not producing pic. */
9244 gcc_unreachable ();
9246 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9247 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9248 targetm.asm_out.internal_label (asm_out_file, "L",
9249 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9251 else
9253 char name[32];
9254 get_pc_thunk_name (name, REGNO (dest));
9255 pic_labels_used |= 1 << REGNO (dest);
9257 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9258 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9259 output_asm_insn ("call\t%X2", xops);
9261 #if TARGET_MACHO
9262 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9263 This is what will be referenced by the Mach-O PIC subsystem. */
9264 if (machopic_should_output_picbase_label () || !label)
9265 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9267 /* When we are restoring the pic base at the site of a nonlocal label,
9268 and we decided to emit the pic base above, we will still output a
9269 local label used for calculating the correction offset (even though
9270 the offset will be 0 in that case). */
9271 if (label)
9272 targetm.asm_out.internal_label (asm_out_file, "L",
9273 CODE_LABEL_NUMBER (label));
9274 #endif
9277 if (!TARGET_MACHO)
9278 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9280 return "";
9283 /* Generate an "push" pattern for input ARG. */
9285 static rtx
9286 gen_push (rtx arg)
9288 struct machine_function *m = cfun->machine;
9290 if (m->fs.cfa_reg == stack_pointer_rtx)
9291 m->fs.cfa_offset += UNITS_PER_WORD;
9292 m->fs.sp_offset += UNITS_PER_WORD;
9294 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9295 arg = gen_rtx_REG (word_mode, REGNO (arg));
9297 return gen_rtx_SET (VOIDmode,
9298 gen_rtx_MEM (word_mode,
9299 gen_rtx_PRE_DEC (Pmode,
9300 stack_pointer_rtx)),
9301 arg);
9304 /* Generate an "pop" pattern for input ARG. */
9306 static rtx
9307 gen_pop (rtx arg)
9309 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9310 arg = gen_rtx_REG (word_mode, REGNO (arg));
9312 return gen_rtx_SET (VOIDmode,
9313 arg,
9314 gen_rtx_MEM (word_mode,
9315 gen_rtx_POST_INC (Pmode,
9316 stack_pointer_rtx)));
9319 /* Return >= 0 if there is an unused call-clobbered register available
9320 for the entire function. */
9322 static unsigned int
9323 ix86_select_alt_pic_regnum (void)
9325 if (crtl->is_leaf
9326 && !crtl->profile
9327 && !ix86_current_function_calls_tls_descriptor)
9329 int i, drap;
9330 /* Can't use the same register for both PIC and DRAP. */
9331 if (crtl->drap_reg)
9332 drap = REGNO (crtl->drap_reg);
9333 else
9334 drap = -1;
9335 for (i = 2; i >= 0; --i)
9336 if (i != drap && !df_regs_ever_live_p (i))
9337 return i;
9340 return INVALID_REGNUM;
9343 /* Return TRUE if we need to save REGNO. */
9345 static bool
9346 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9348 if (pic_offset_table_rtx
9349 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9350 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9351 || crtl->profile
9352 || crtl->calls_eh_return
9353 || crtl->uses_const_pool
9354 || cfun->has_nonlocal_label))
9355 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9357 if (crtl->calls_eh_return && maybe_eh_return)
9359 unsigned i;
9360 for (i = 0; ; i++)
9362 unsigned test = EH_RETURN_DATA_REGNO (i);
9363 if (test == INVALID_REGNUM)
9364 break;
9365 if (test == regno)
9366 return true;
9370 if (crtl->drap_reg
9371 && regno == REGNO (crtl->drap_reg)
9372 && !cfun->machine->no_drap_save_restore)
9373 return true;
9375 return (df_regs_ever_live_p (regno)
9376 && !call_used_regs[regno]
9377 && !fixed_regs[regno]
9378 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9381 /* Return number of saved general prupose registers. */
9383 static int
9384 ix86_nsaved_regs (void)
9386 int nregs = 0;
9387 int regno;
9389 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9390 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9391 nregs ++;
9392 return nregs;
9395 /* Return number of saved SSE registrers. */
9397 static int
9398 ix86_nsaved_sseregs (void)
9400 int nregs = 0;
9401 int regno;
9403 if (!TARGET_64BIT_MS_ABI)
9404 return 0;
9405 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9406 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9407 nregs ++;
9408 return nregs;
9411 /* Given FROM and TO register numbers, say whether this elimination is
9412 allowed. If stack alignment is needed, we can only replace argument
9413 pointer with hard frame pointer, or replace frame pointer with stack
9414 pointer. Otherwise, frame pointer elimination is automatically
9415 handled and all other eliminations are valid. */
9417 static bool
9418 ix86_can_eliminate (const int from, const int to)
9420 if (stack_realign_fp)
9421 return ((from == ARG_POINTER_REGNUM
9422 && to == HARD_FRAME_POINTER_REGNUM)
9423 || (from == FRAME_POINTER_REGNUM
9424 && to == STACK_POINTER_REGNUM));
9425 else
9426 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9429 /* Return the offset between two registers, one to be eliminated, and the other
9430 its replacement, at the start of a routine. */
9432 HOST_WIDE_INT
9433 ix86_initial_elimination_offset (int from, int to)
9435 struct ix86_frame frame;
9436 ix86_compute_frame_layout (&frame);
9438 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9439 return frame.hard_frame_pointer_offset;
9440 else if (from == FRAME_POINTER_REGNUM
9441 && to == HARD_FRAME_POINTER_REGNUM)
9442 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9443 else
9445 gcc_assert (to == STACK_POINTER_REGNUM);
9447 if (from == ARG_POINTER_REGNUM)
9448 return frame.stack_pointer_offset;
9450 gcc_assert (from == FRAME_POINTER_REGNUM);
9451 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9455 /* In a dynamically-aligned function, we can't know the offset from
9456 stack pointer to frame pointer, so we must ensure that setjmp
9457 eliminates fp against the hard fp (%ebp) rather than trying to
9458 index from %esp up to the top of the frame across a gap that is
9459 of unknown (at compile-time) size. */
9460 static rtx
9461 ix86_builtin_setjmp_frame_value (void)
9463 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9466 /* When using -fsplit-stack, the allocation routines set a field in
9467 the TCB to the bottom of the stack plus this much space, measured
9468 in bytes. */
9470 #define SPLIT_STACK_AVAILABLE 256
9472 /* Fill structure ix86_frame about frame of currently computed function. */
9474 static void
9475 ix86_compute_frame_layout (struct ix86_frame *frame)
9477 unsigned HOST_WIDE_INT stack_alignment_needed;
9478 HOST_WIDE_INT offset;
9479 unsigned HOST_WIDE_INT preferred_alignment;
9480 HOST_WIDE_INT size = get_frame_size ();
9481 HOST_WIDE_INT to_allocate;
9483 frame->nregs = ix86_nsaved_regs ();
9484 frame->nsseregs = ix86_nsaved_sseregs ();
9486 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9487 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9489 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9490 function prologues and leaf. */
9491 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9492 && (!crtl->is_leaf || cfun->calls_alloca != 0
9493 || ix86_current_function_calls_tls_descriptor))
9495 preferred_alignment = 16;
9496 stack_alignment_needed = 16;
9497 crtl->preferred_stack_boundary = 128;
9498 crtl->stack_alignment_needed = 128;
9501 gcc_assert (!size || stack_alignment_needed);
9502 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9503 gcc_assert (preferred_alignment <= stack_alignment_needed);
9505 /* For SEH we have to limit the amount of code movement into the prologue.
9506 At present we do this via a BLOCKAGE, at which point there's very little
9507 scheduling that can be done, which means that there's very little point
9508 in doing anything except PUSHs. */
9509 if (TARGET_SEH)
9510 cfun->machine->use_fast_prologue_epilogue = false;
9512 /* During reload iteration the amount of registers saved can change.
9513 Recompute the value as needed. Do not recompute when amount of registers
9514 didn't change as reload does multiple calls to the function and does not
9515 expect the decision to change within single iteration. */
9516 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9517 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9519 int count = frame->nregs;
9520 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9522 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9524 /* The fast prologue uses move instead of push to save registers. This
9525 is significantly longer, but also executes faster as modern hardware
9526 can execute the moves in parallel, but can't do that for push/pop.
9528 Be careful about choosing what prologue to emit: When function takes
9529 many instructions to execute we may use slow version as well as in
9530 case function is known to be outside hot spot (this is known with
9531 feedback only). Weight the size of function by number of registers
9532 to save as it is cheap to use one or two push instructions but very
9533 slow to use many of them. */
9534 if (count)
9535 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9536 if (node->frequency < NODE_FREQUENCY_NORMAL
9537 || (flag_branch_probabilities
9538 && node->frequency < NODE_FREQUENCY_HOT))
9539 cfun->machine->use_fast_prologue_epilogue = false;
9540 else
9541 cfun->machine->use_fast_prologue_epilogue
9542 = !expensive_function_p (count);
9545 frame->save_regs_using_mov
9546 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9547 /* If static stack checking is enabled and done with probes,
9548 the registers need to be saved before allocating the frame. */
9549 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9551 /* Skip return address. */
9552 offset = UNITS_PER_WORD;
9554 /* Skip pushed static chain. */
9555 if (ix86_static_chain_on_stack)
9556 offset += UNITS_PER_WORD;
9558 /* Skip saved base pointer. */
9559 if (frame_pointer_needed)
9560 offset += UNITS_PER_WORD;
9561 frame->hfp_save_offset = offset;
9563 /* The traditional frame pointer location is at the top of the frame. */
9564 frame->hard_frame_pointer_offset = offset;
9566 /* Register save area */
9567 offset += frame->nregs * UNITS_PER_WORD;
9568 frame->reg_save_offset = offset;
9570 /* On SEH target, registers are pushed just before the frame pointer
9571 location. */
9572 if (TARGET_SEH)
9573 frame->hard_frame_pointer_offset = offset;
9575 /* Align and set SSE register save area. */
9576 if (frame->nsseregs)
9578 /* The only ABI that has saved SSE registers (Win64) also has a
9579 16-byte aligned default stack, and thus we don't need to be
9580 within the re-aligned local stack frame to save them. */
9581 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9582 offset = (offset + 16 - 1) & -16;
9583 offset += frame->nsseregs * 16;
9585 frame->sse_reg_save_offset = offset;
9587 /* The re-aligned stack starts here. Values before this point are not
9588 directly comparable with values below this point. In order to make
9589 sure that no value happens to be the same before and after, force
9590 the alignment computation below to add a non-zero value. */
9591 if (stack_realign_fp)
9592 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9594 /* Va-arg area */
9595 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9596 offset += frame->va_arg_size;
9598 /* Align start of frame for local function. */
9599 if (stack_realign_fp
9600 || offset != frame->sse_reg_save_offset
9601 || size != 0
9602 || !crtl->is_leaf
9603 || cfun->calls_alloca
9604 || ix86_current_function_calls_tls_descriptor)
9605 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9607 /* Frame pointer points here. */
9608 frame->frame_pointer_offset = offset;
9610 offset += size;
9612 /* Add outgoing arguments area. Can be skipped if we eliminated
9613 all the function calls as dead code.
9614 Skipping is however impossible when function calls alloca. Alloca
9615 expander assumes that last crtl->outgoing_args_size
9616 of stack frame are unused. */
9617 if (ACCUMULATE_OUTGOING_ARGS
9618 && (!crtl->is_leaf || cfun->calls_alloca
9619 || ix86_current_function_calls_tls_descriptor))
9621 offset += crtl->outgoing_args_size;
9622 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9624 else
9625 frame->outgoing_arguments_size = 0;
9627 /* Align stack boundary. Only needed if we're calling another function
9628 or using alloca. */
9629 if (!crtl->is_leaf || cfun->calls_alloca
9630 || ix86_current_function_calls_tls_descriptor)
9631 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9633 /* We've reached end of stack frame. */
9634 frame->stack_pointer_offset = offset;
9636 /* Size prologue needs to allocate. */
9637 to_allocate = offset - frame->sse_reg_save_offset;
9639 if ((!to_allocate && frame->nregs <= 1)
9640 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9641 frame->save_regs_using_mov = false;
9643 if (ix86_using_red_zone ()
9644 && crtl->sp_is_unchanging
9645 && crtl->is_leaf
9646 && !ix86_current_function_calls_tls_descriptor)
9648 frame->red_zone_size = to_allocate;
9649 if (frame->save_regs_using_mov)
9650 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9651 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9652 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9654 else
9655 frame->red_zone_size = 0;
9656 frame->stack_pointer_offset -= frame->red_zone_size;
9658 /* The SEH frame pointer location is near the bottom of the frame.
9659 This is enforced by the fact that the difference between the
9660 stack pointer and the frame pointer is limited to 240 bytes in
9661 the unwind data structure. */
9662 if (TARGET_SEH)
9664 HOST_WIDE_INT diff;
9666 /* If we can leave the frame pointer where it is, do so. Also, returns
9667 the establisher frame for __builtin_frame_address (0). */
9668 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9669 if (diff <= SEH_MAX_FRAME_SIZE
9670 && (diff > 240 || (diff & 15) != 0)
9671 && !crtl->accesses_prior_frames)
9673 /* Ideally we'd determine what portion of the local stack frame
9674 (within the constraint of the lowest 240) is most heavily used.
9675 But without that complication, simply bias the frame pointer
9676 by 128 bytes so as to maximize the amount of the local stack
9677 frame that is addressable with 8-bit offsets. */
9678 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9683 /* This is semi-inlined memory_address_length, but simplified
9684 since we know that we're always dealing with reg+offset, and
9685 to avoid having to create and discard all that rtl. */
9687 static inline int
9688 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9690 int len = 4;
9692 if (offset == 0)
9694 /* EBP and R13 cannot be encoded without an offset. */
9695 len = (regno == BP_REG || regno == R13_REG);
9697 else if (IN_RANGE (offset, -128, 127))
9698 len = 1;
9700 /* ESP and R12 must be encoded with a SIB byte. */
9701 if (regno == SP_REG || regno == R12_REG)
9702 len++;
9704 return len;
9707 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9708 The valid base registers are taken from CFUN->MACHINE->FS. */
9710 static rtx
9711 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9713 const struct machine_function *m = cfun->machine;
9714 rtx base_reg = NULL;
9715 HOST_WIDE_INT base_offset = 0;
9717 if (m->use_fast_prologue_epilogue)
9719 /* Choose the base register most likely to allow the most scheduling
9720 opportunities. Generally FP is valid throughout the function,
9721 while DRAP must be reloaded within the epilogue. But choose either
9722 over the SP due to increased encoding size. */
9724 if (m->fs.fp_valid)
9726 base_reg = hard_frame_pointer_rtx;
9727 base_offset = m->fs.fp_offset - cfa_offset;
9729 else if (m->fs.drap_valid)
9731 base_reg = crtl->drap_reg;
9732 base_offset = 0 - cfa_offset;
9734 else if (m->fs.sp_valid)
9736 base_reg = stack_pointer_rtx;
9737 base_offset = m->fs.sp_offset - cfa_offset;
9740 else
9742 HOST_WIDE_INT toffset;
9743 int len = 16, tlen;
9745 /* Choose the base register with the smallest address encoding.
9746 With a tie, choose FP > DRAP > SP. */
9747 if (m->fs.sp_valid)
9749 base_reg = stack_pointer_rtx;
9750 base_offset = m->fs.sp_offset - cfa_offset;
9751 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9753 if (m->fs.drap_valid)
9755 toffset = 0 - cfa_offset;
9756 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9757 if (tlen <= len)
9759 base_reg = crtl->drap_reg;
9760 base_offset = toffset;
9761 len = tlen;
9764 if (m->fs.fp_valid)
9766 toffset = m->fs.fp_offset - cfa_offset;
9767 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9768 if (tlen <= len)
9770 base_reg = hard_frame_pointer_rtx;
9771 base_offset = toffset;
9772 len = tlen;
9776 gcc_assert (base_reg != NULL);
9778 return plus_constant (Pmode, base_reg, base_offset);
9781 /* Emit code to save registers in the prologue. */
9783 static void
9784 ix86_emit_save_regs (void)
9786 unsigned int regno;
9787 rtx insn;
9789 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9790 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9792 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9793 RTX_FRAME_RELATED_P (insn) = 1;
9797 /* Emit a single register save at CFA - CFA_OFFSET. */
9799 static void
9800 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9801 HOST_WIDE_INT cfa_offset)
9803 struct machine_function *m = cfun->machine;
9804 rtx reg = gen_rtx_REG (mode, regno);
9805 rtx mem, addr, base, insn;
9807 addr = choose_baseaddr (cfa_offset);
9808 mem = gen_frame_mem (mode, addr);
9810 /* For SSE saves, we need to indicate the 128-bit alignment. */
9811 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9813 insn = emit_move_insn (mem, reg);
9814 RTX_FRAME_RELATED_P (insn) = 1;
9816 base = addr;
9817 if (GET_CODE (base) == PLUS)
9818 base = XEXP (base, 0);
9819 gcc_checking_assert (REG_P (base));
9821 /* When saving registers into a re-aligned local stack frame, avoid
9822 any tricky guessing by dwarf2out. */
9823 if (m->fs.realigned)
9825 gcc_checking_assert (stack_realign_drap);
9827 if (regno == REGNO (crtl->drap_reg))
9829 /* A bit of a hack. We force the DRAP register to be saved in
9830 the re-aligned stack frame, which provides us with a copy
9831 of the CFA that will last past the prologue. Install it. */
9832 gcc_checking_assert (cfun->machine->fs.fp_valid);
9833 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9834 cfun->machine->fs.fp_offset - cfa_offset);
9835 mem = gen_rtx_MEM (mode, addr);
9836 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9838 else
9840 /* The frame pointer is a stable reference within the
9841 aligned frame. Use it. */
9842 gcc_checking_assert (cfun->machine->fs.fp_valid);
9843 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9844 cfun->machine->fs.fp_offset - cfa_offset);
9845 mem = gen_rtx_MEM (mode, addr);
9846 add_reg_note (insn, REG_CFA_EXPRESSION,
9847 gen_rtx_SET (VOIDmode, mem, reg));
9851 /* The memory may not be relative to the current CFA register,
9852 which means that we may need to generate a new pattern for
9853 use by the unwind info. */
9854 else if (base != m->fs.cfa_reg)
9856 addr = plus_constant (Pmode, m->fs.cfa_reg,
9857 m->fs.cfa_offset - cfa_offset);
9858 mem = gen_rtx_MEM (mode, addr);
9859 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9863 /* Emit code to save registers using MOV insns.
9864 First register is stored at CFA - CFA_OFFSET. */
9865 static void
9866 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9868 unsigned int regno;
9870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9871 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9873 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9874 cfa_offset -= UNITS_PER_WORD;
9878 /* Emit code to save SSE registers using MOV insns.
9879 First register is stored at CFA - CFA_OFFSET. */
9880 static void
9881 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9883 unsigned int regno;
9885 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9886 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9888 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9889 cfa_offset -= 16;
9893 static GTY(()) rtx queued_cfa_restores;
9895 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9896 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9897 Don't add the note if the previously saved value will be left untouched
9898 within stack red-zone till return, as unwinders can find the same value
9899 in the register and on the stack. */
9901 static void
9902 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9904 if (!crtl->shrink_wrapped
9905 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9906 return;
9908 if (insn)
9910 add_reg_note (insn, REG_CFA_RESTORE, reg);
9911 RTX_FRAME_RELATED_P (insn) = 1;
9913 else
9914 queued_cfa_restores
9915 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9918 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9920 static void
9921 ix86_add_queued_cfa_restore_notes (rtx insn)
9923 rtx last;
9924 if (!queued_cfa_restores)
9925 return;
9926 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9928 XEXP (last, 1) = REG_NOTES (insn);
9929 REG_NOTES (insn) = queued_cfa_restores;
9930 queued_cfa_restores = NULL_RTX;
9931 RTX_FRAME_RELATED_P (insn) = 1;
9934 /* Expand prologue or epilogue stack adjustment.
9935 The pattern exist to put a dependency on all ebp-based memory accesses.
9936 STYLE should be negative if instructions should be marked as frame related,
9937 zero if %r11 register is live and cannot be freely used and positive
9938 otherwise. */
9940 static void
9941 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9942 int style, bool set_cfa)
9944 struct machine_function *m = cfun->machine;
9945 rtx insn;
9946 bool add_frame_related_expr = false;
9948 if (Pmode == SImode)
9949 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9950 else if (x86_64_immediate_operand (offset, DImode))
9951 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9952 else
9954 rtx tmp;
9955 /* r11 is used by indirect sibcall return as well, set before the
9956 epilogue and used after the epilogue. */
9957 if (style)
9958 tmp = gen_rtx_REG (DImode, R11_REG);
9959 else
9961 gcc_assert (src != hard_frame_pointer_rtx
9962 && dest != hard_frame_pointer_rtx);
9963 tmp = hard_frame_pointer_rtx;
9965 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9966 if (style < 0)
9967 add_frame_related_expr = true;
9969 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9972 insn = emit_insn (insn);
9973 if (style >= 0)
9974 ix86_add_queued_cfa_restore_notes (insn);
9976 if (set_cfa)
9978 rtx r;
9980 gcc_assert (m->fs.cfa_reg == src);
9981 m->fs.cfa_offset += INTVAL (offset);
9982 m->fs.cfa_reg = dest;
9984 r = gen_rtx_PLUS (Pmode, src, offset);
9985 r = gen_rtx_SET (VOIDmode, dest, r);
9986 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9987 RTX_FRAME_RELATED_P (insn) = 1;
9989 else if (style < 0)
9991 RTX_FRAME_RELATED_P (insn) = 1;
9992 if (add_frame_related_expr)
9994 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9995 r = gen_rtx_SET (VOIDmode, dest, r);
9996 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10000 if (dest == stack_pointer_rtx)
10002 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10003 bool valid = m->fs.sp_valid;
10005 if (src == hard_frame_pointer_rtx)
10007 valid = m->fs.fp_valid;
10008 ooffset = m->fs.fp_offset;
10010 else if (src == crtl->drap_reg)
10012 valid = m->fs.drap_valid;
10013 ooffset = 0;
10015 else
10017 /* Else there are two possibilities: SP itself, which we set
10018 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10019 taken care of this by hand along the eh_return path. */
10020 gcc_checking_assert (src == stack_pointer_rtx
10021 || offset == const0_rtx);
10024 m->fs.sp_offset = ooffset - INTVAL (offset);
10025 m->fs.sp_valid = valid;
10029 /* Find an available register to be used as dynamic realign argument
10030 pointer regsiter. Such a register will be written in prologue and
10031 used in begin of body, so it must not be
10032 1. parameter passing register.
10033 2. GOT pointer.
10034 We reuse static-chain register if it is available. Otherwise, we
10035 use DI for i386 and R13 for x86-64. We chose R13 since it has
10036 shorter encoding.
10038 Return: the regno of chosen register. */
10040 static unsigned int
10041 find_drap_reg (void)
10043 tree decl = cfun->decl;
10045 if (TARGET_64BIT)
10047 /* Use R13 for nested function or function need static chain.
10048 Since function with tail call may use any caller-saved
10049 registers in epilogue, DRAP must not use caller-saved
10050 register in such case. */
10051 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10052 return R13_REG;
10054 return R10_REG;
10056 else
10058 /* Use DI for nested function or function need static chain.
10059 Since function with tail call may use any caller-saved
10060 registers in epilogue, DRAP must not use caller-saved
10061 register in such case. */
10062 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10063 return DI_REG;
10065 /* Reuse static chain register if it isn't used for parameter
10066 passing. */
10067 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10069 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10070 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10071 return CX_REG;
10073 return DI_REG;
10077 /* Return minimum incoming stack alignment. */
10079 static unsigned int
10080 ix86_minimum_incoming_stack_boundary (bool sibcall)
10082 unsigned int incoming_stack_boundary;
10084 /* Prefer the one specified at command line. */
10085 if (ix86_user_incoming_stack_boundary)
10086 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10087 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10088 if -mstackrealign is used, it isn't used for sibcall check and
10089 estimated stack alignment is 128bit. */
10090 else if (!sibcall
10091 && !TARGET_64BIT
10092 && ix86_force_align_arg_pointer
10093 && crtl->stack_alignment_estimated == 128)
10094 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10095 else
10096 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10098 /* Incoming stack alignment can be changed on individual functions
10099 via force_align_arg_pointer attribute. We use the smallest
10100 incoming stack boundary. */
10101 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10102 && lookup_attribute (ix86_force_align_arg_pointer_string,
10103 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10104 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10106 /* The incoming stack frame has to be aligned at least at
10107 parm_stack_boundary. */
10108 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10109 incoming_stack_boundary = crtl->parm_stack_boundary;
10111 /* Stack at entrance of main is aligned by runtime. We use the
10112 smallest incoming stack boundary. */
10113 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10114 && DECL_NAME (current_function_decl)
10115 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10116 && DECL_FILE_SCOPE_P (current_function_decl))
10117 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10119 return incoming_stack_boundary;
10122 /* Update incoming stack boundary and estimated stack alignment. */
10124 static void
10125 ix86_update_stack_boundary (void)
10127 ix86_incoming_stack_boundary
10128 = ix86_minimum_incoming_stack_boundary (false);
10130 /* x86_64 vararg needs 16byte stack alignment for register save
10131 area. */
10132 if (TARGET_64BIT
10133 && cfun->stdarg
10134 && crtl->stack_alignment_estimated < 128)
10135 crtl->stack_alignment_estimated = 128;
10138 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10139 needed or an rtx for DRAP otherwise. */
10141 static rtx
10142 ix86_get_drap_rtx (void)
10144 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10145 crtl->need_drap = true;
10147 if (stack_realign_drap)
10149 /* Assign DRAP to vDRAP and returns vDRAP */
10150 unsigned int regno = find_drap_reg ();
10151 rtx drap_vreg;
10152 rtx arg_ptr;
10153 rtx seq, insn;
10155 arg_ptr = gen_rtx_REG (Pmode, regno);
10156 crtl->drap_reg = arg_ptr;
10158 start_sequence ();
10159 drap_vreg = copy_to_reg (arg_ptr);
10160 seq = get_insns ();
10161 end_sequence ();
10163 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10164 if (!optimize)
10166 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10167 RTX_FRAME_RELATED_P (insn) = 1;
10169 return drap_vreg;
10171 else
10172 return NULL;
10175 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10177 static rtx
10178 ix86_internal_arg_pointer (void)
10180 return virtual_incoming_args_rtx;
10183 struct scratch_reg {
10184 rtx reg;
10185 bool saved;
10188 /* Return a short-lived scratch register for use on function entry.
10189 In 32-bit mode, it is valid only after the registers are saved
10190 in the prologue. This register must be released by means of
10191 release_scratch_register_on_entry once it is dead. */
10193 static void
10194 get_scratch_register_on_entry (struct scratch_reg *sr)
10196 int regno;
10198 sr->saved = false;
10200 if (TARGET_64BIT)
10202 /* We always use R11 in 64-bit mode. */
10203 regno = R11_REG;
10205 else
10207 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10208 bool fastcall_p
10209 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10210 bool thiscall_p
10211 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10212 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10213 int regparm = ix86_function_regparm (fntype, decl);
10214 int drap_regno
10215 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10217 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10218 for the static chain register. */
10219 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10220 && drap_regno != AX_REG)
10221 regno = AX_REG;
10222 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10223 for the static chain register. */
10224 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10225 regno = AX_REG;
10226 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10227 regno = DX_REG;
10228 /* ecx is the static chain register. */
10229 else if (regparm < 3 && !fastcall_p && !thiscall_p
10230 && !static_chain_p
10231 && drap_regno != CX_REG)
10232 regno = CX_REG;
10233 else if (ix86_save_reg (BX_REG, true))
10234 regno = BX_REG;
10235 /* esi is the static chain register. */
10236 else if (!(regparm == 3 && static_chain_p)
10237 && ix86_save_reg (SI_REG, true))
10238 regno = SI_REG;
10239 else if (ix86_save_reg (DI_REG, true))
10240 regno = DI_REG;
10241 else
10243 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10244 sr->saved = true;
10248 sr->reg = gen_rtx_REG (Pmode, regno);
10249 if (sr->saved)
10251 rtx insn = emit_insn (gen_push (sr->reg));
10252 RTX_FRAME_RELATED_P (insn) = 1;
10256 /* Release a scratch register obtained from the preceding function. */
10258 static void
10259 release_scratch_register_on_entry (struct scratch_reg *sr)
10261 if (sr->saved)
10263 struct machine_function *m = cfun->machine;
10264 rtx x, insn = emit_insn (gen_pop (sr->reg));
10266 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10267 RTX_FRAME_RELATED_P (insn) = 1;
10268 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10269 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10270 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10271 m->fs.sp_offset -= UNITS_PER_WORD;
10275 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10277 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10279 static void
10280 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10282 /* We skip the probe for the first interval + a small dope of 4 words and
10283 probe that many bytes past the specified size to maintain a protection
10284 area at the botton of the stack. */
10285 const int dope = 4 * UNITS_PER_WORD;
10286 rtx size_rtx = GEN_INT (size), last;
10288 /* See if we have a constant small number of probes to generate. If so,
10289 that's the easy case. The run-time loop is made up of 11 insns in the
10290 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10291 for n # of intervals. */
10292 if (size <= 5 * PROBE_INTERVAL)
10294 HOST_WIDE_INT i, adjust;
10295 bool first_probe = true;
10297 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10298 values of N from 1 until it exceeds SIZE. If only one probe is
10299 needed, this will not generate any code. Then adjust and probe
10300 to PROBE_INTERVAL + SIZE. */
10301 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10303 if (first_probe)
10305 adjust = 2 * PROBE_INTERVAL + dope;
10306 first_probe = false;
10308 else
10309 adjust = PROBE_INTERVAL;
10311 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10312 plus_constant (Pmode, stack_pointer_rtx,
10313 -adjust)));
10314 emit_stack_probe (stack_pointer_rtx);
10317 if (first_probe)
10318 adjust = size + PROBE_INTERVAL + dope;
10319 else
10320 adjust = size + PROBE_INTERVAL - i;
10322 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10323 plus_constant (Pmode, stack_pointer_rtx,
10324 -adjust)));
10325 emit_stack_probe (stack_pointer_rtx);
10327 /* Adjust back to account for the additional first interval. */
10328 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10329 plus_constant (Pmode, stack_pointer_rtx,
10330 PROBE_INTERVAL + dope)));
10333 /* Otherwise, do the same as above, but in a loop. Note that we must be
10334 extra careful with variables wrapping around because we might be at
10335 the very top (or the very bottom) of the address space and we have
10336 to be able to handle this case properly; in particular, we use an
10337 equality test for the loop condition. */
10338 else
10340 HOST_WIDE_INT rounded_size;
10341 struct scratch_reg sr;
10343 get_scratch_register_on_entry (&sr);
10346 /* Step 1: round SIZE to the previous multiple of the interval. */
10348 rounded_size = size & -PROBE_INTERVAL;
10351 /* Step 2: compute initial and final value of the loop counter. */
10353 /* SP = SP_0 + PROBE_INTERVAL. */
10354 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10355 plus_constant (Pmode, stack_pointer_rtx,
10356 - (PROBE_INTERVAL + dope))));
10358 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10359 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10360 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10361 gen_rtx_PLUS (Pmode, sr.reg,
10362 stack_pointer_rtx)));
10365 /* Step 3: the loop
10367 while (SP != LAST_ADDR)
10369 SP = SP + PROBE_INTERVAL
10370 probe at SP
10373 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10374 values of N from 1 until it is equal to ROUNDED_SIZE. */
10376 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10379 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10380 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10382 if (size != rounded_size)
10384 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10385 plus_constant (Pmode, stack_pointer_rtx,
10386 rounded_size - size)));
10387 emit_stack_probe (stack_pointer_rtx);
10390 /* Adjust back to account for the additional first interval. */
10391 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10392 plus_constant (Pmode, stack_pointer_rtx,
10393 PROBE_INTERVAL + dope)));
10395 release_scratch_register_on_entry (&sr);
10398 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10400 /* Even if the stack pointer isn't the CFA register, we need to correctly
10401 describe the adjustments made to it, in particular differentiate the
10402 frame-related ones from the frame-unrelated ones. */
10403 if (size > 0)
10405 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10406 XVECEXP (expr, 0, 0)
10407 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10408 plus_constant (Pmode, stack_pointer_rtx, -size));
10409 XVECEXP (expr, 0, 1)
10410 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10411 plus_constant (Pmode, stack_pointer_rtx,
10412 PROBE_INTERVAL + dope + size));
10413 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10414 RTX_FRAME_RELATED_P (last) = 1;
10416 cfun->machine->fs.sp_offset += size;
10419 /* Make sure nothing is scheduled before we are done. */
10420 emit_insn (gen_blockage ());
10423 /* Adjust the stack pointer up to REG while probing it. */
10425 const char *
10426 output_adjust_stack_and_probe (rtx reg)
10428 static int labelno = 0;
10429 char loop_lab[32], end_lab[32];
10430 rtx xops[2];
10432 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10433 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10435 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10437 /* Jump to END_LAB if SP == LAST_ADDR. */
10438 xops[0] = stack_pointer_rtx;
10439 xops[1] = reg;
10440 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10441 fputs ("\tje\t", asm_out_file);
10442 assemble_name_raw (asm_out_file, end_lab);
10443 fputc ('\n', asm_out_file);
10445 /* SP = SP + PROBE_INTERVAL. */
10446 xops[1] = GEN_INT (PROBE_INTERVAL);
10447 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10449 /* Probe at SP. */
10450 xops[1] = const0_rtx;
10451 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10453 fprintf (asm_out_file, "\tjmp\t");
10454 assemble_name_raw (asm_out_file, loop_lab);
10455 fputc ('\n', asm_out_file);
10457 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10459 return "";
10462 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10463 inclusive. These are offsets from the current stack pointer. */
10465 static void
10466 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10468 /* See if we have a constant small number of probes to generate. If so,
10469 that's the easy case. The run-time loop is made up of 7 insns in the
10470 generic case while the compile-time loop is made up of n insns for n #
10471 of intervals. */
10472 if (size <= 7 * PROBE_INTERVAL)
10474 HOST_WIDE_INT i;
10476 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10477 it exceeds SIZE. If only one probe is needed, this will not
10478 generate any code. Then probe at FIRST + SIZE. */
10479 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10480 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10481 -(first + i)));
10483 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10484 -(first + size)));
10487 /* Otherwise, do the same as above, but in a loop. Note that we must be
10488 extra careful with variables wrapping around because we might be at
10489 the very top (or the very bottom) of the address space and we have
10490 to be able to handle this case properly; in particular, we use an
10491 equality test for the loop condition. */
10492 else
10494 HOST_WIDE_INT rounded_size, last;
10495 struct scratch_reg sr;
10497 get_scratch_register_on_entry (&sr);
10500 /* Step 1: round SIZE to the previous multiple of the interval. */
10502 rounded_size = size & -PROBE_INTERVAL;
10505 /* Step 2: compute initial and final value of the loop counter. */
10507 /* TEST_OFFSET = FIRST. */
10508 emit_move_insn (sr.reg, GEN_INT (-first));
10510 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10511 last = first + rounded_size;
10514 /* Step 3: the loop
10516 while (TEST_ADDR != LAST_ADDR)
10518 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10519 probe at TEST_ADDR
10522 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10523 until it is equal to ROUNDED_SIZE. */
10525 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10528 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10529 that SIZE is equal to ROUNDED_SIZE. */
10531 if (size != rounded_size)
10532 emit_stack_probe (plus_constant (Pmode,
10533 gen_rtx_PLUS (Pmode,
10534 stack_pointer_rtx,
10535 sr.reg),
10536 rounded_size - size));
10538 release_scratch_register_on_entry (&sr);
10541 /* Make sure nothing is scheduled before we are done. */
10542 emit_insn (gen_blockage ());
10545 /* Probe a range of stack addresses from REG to END, inclusive. These are
10546 offsets from the current stack pointer. */
10548 const char *
10549 output_probe_stack_range (rtx reg, rtx end)
10551 static int labelno = 0;
10552 char loop_lab[32], end_lab[32];
10553 rtx xops[3];
10555 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10556 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10558 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10560 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10561 xops[0] = reg;
10562 xops[1] = end;
10563 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10564 fputs ("\tje\t", asm_out_file);
10565 assemble_name_raw (asm_out_file, end_lab);
10566 fputc ('\n', asm_out_file);
10568 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10569 xops[1] = GEN_INT (PROBE_INTERVAL);
10570 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10572 /* Probe at TEST_ADDR. */
10573 xops[0] = stack_pointer_rtx;
10574 xops[1] = reg;
10575 xops[2] = const0_rtx;
10576 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10578 fprintf (asm_out_file, "\tjmp\t");
10579 assemble_name_raw (asm_out_file, loop_lab);
10580 fputc ('\n', asm_out_file);
10582 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10584 return "";
10587 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10588 to be generated in correct form. */
10589 static void
10590 ix86_finalize_stack_realign_flags (void)
10592 /* Check if stack realign is really needed after reload, and
10593 stores result in cfun */
10594 unsigned int incoming_stack_boundary
10595 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10596 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10597 unsigned int stack_realign = (incoming_stack_boundary
10598 < (crtl->is_leaf
10599 ? crtl->max_used_stack_slot_alignment
10600 : crtl->stack_alignment_needed));
10602 if (crtl->stack_realign_finalized)
10604 /* After stack_realign_needed is finalized, we can't no longer
10605 change it. */
10606 gcc_assert (crtl->stack_realign_needed == stack_realign);
10607 return;
10610 /* If the only reason for frame_pointer_needed is that we conservatively
10611 assumed stack realignment might be needed, but in the end nothing that
10612 needed the stack alignment had been spilled, clear frame_pointer_needed
10613 and say we don't need stack realignment. */
10614 if (stack_realign
10615 && frame_pointer_needed
10616 && crtl->is_leaf
10617 && flag_omit_frame_pointer
10618 && crtl->sp_is_unchanging
10619 && !ix86_current_function_calls_tls_descriptor
10620 && !crtl->accesses_prior_frames
10621 && !cfun->calls_alloca
10622 && !crtl->calls_eh_return
10623 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10624 && !ix86_frame_pointer_required ()
10625 && get_frame_size () == 0
10626 && ix86_nsaved_sseregs () == 0
10627 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10629 HARD_REG_SET set_up_by_prologue, prologue_used;
10630 basic_block bb;
10632 CLEAR_HARD_REG_SET (prologue_used);
10633 CLEAR_HARD_REG_SET (set_up_by_prologue);
10634 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10635 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10636 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10637 HARD_FRAME_POINTER_REGNUM);
10638 FOR_EACH_BB_FN (bb, cfun)
10640 rtx insn;
10641 FOR_BB_INSNS (bb, insn)
10642 if (NONDEBUG_INSN_P (insn)
10643 && requires_stack_frame_p (insn, prologue_used,
10644 set_up_by_prologue))
10646 crtl->stack_realign_needed = stack_realign;
10647 crtl->stack_realign_finalized = true;
10648 return;
10652 /* If drap has been set, but it actually isn't live at the start
10653 of the function, there is no reason to set it up. */
10654 if (crtl->drap_reg)
10656 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10657 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10659 crtl->drap_reg = NULL_RTX;
10660 crtl->need_drap = false;
10663 else
10664 cfun->machine->no_drap_save_restore = true;
10666 frame_pointer_needed = false;
10667 stack_realign = false;
10668 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10669 crtl->stack_alignment_needed = incoming_stack_boundary;
10670 crtl->stack_alignment_estimated = incoming_stack_boundary;
10671 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10672 crtl->preferred_stack_boundary = incoming_stack_boundary;
10673 df_finish_pass (true);
10674 df_scan_alloc (NULL);
10675 df_scan_blocks ();
10676 df_compute_regs_ever_live (true);
10677 df_analyze ();
10680 crtl->stack_realign_needed = stack_realign;
10681 crtl->stack_realign_finalized = true;
10684 /* Expand the prologue into a bunch of separate insns. */
10686 void
10687 ix86_expand_prologue (void)
10689 struct machine_function *m = cfun->machine;
10690 rtx insn, t;
10691 bool pic_reg_used;
10692 struct ix86_frame frame;
10693 HOST_WIDE_INT allocate;
10694 bool int_registers_saved;
10695 bool sse_registers_saved;
10697 ix86_finalize_stack_realign_flags ();
10699 /* DRAP should not coexist with stack_realign_fp */
10700 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10702 memset (&m->fs, 0, sizeof (m->fs));
10704 /* Initialize CFA state for before the prologue. */
10705 m->fs.cfa_reg = stack_pointer_rtx;
10706 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10708 /* Track SP offset to the CFA. We continue tracking this after we've
10709 swapped the CFA register away from SP. In the case of re-alignment
10710 this is fudged; we're interested to offsets within the local frame. */
10711 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10712 m->fs.sp_valid = true;
10714 ix86_compute_frame_layout (&frame);
10716 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10718 /* We should have already generated an error for any use of
10719 ms_hook on a nested function. */
10720 gcc_checking_assert (!ix86_static_chain_on_stack);
10722 /* Check if profiling is active and we shall use profiling before
10723 prologue variant. If so sorry. */
10724 if (crtl->profile && flag_fentry != 0)
10725 sorry ("ms_hook_prologue attribute isn%'t compatible "
10726 "with -mfentry for 32-bit");
10728 /* In ix86_asm_output_function_label we emitted:
10729 8b ff movl.s %edi,%edi
10730 55 push %ebp
10731 8b ec movl.s %esp,%ebp
10733 This matches the hookable function prologue in Win32 API
10734 functions in Microsoft Windows XP Service Pack 2 and newer.
10735 Wine uses this to enable Windows apps to hook the Win32 API
10736 functions provided by Wine.
10738 What that means is that we've already set up the frame pointer. */
10740 if (frame_pointer_needed
10741 && !(crtl->drap_reg && crtl->stack_realign_needed))
10743 rtx push, mov;
10745 /* We've decided to use the frame pointer already set up.
10746 Describe this to the unwinder by pretending that both
10747 push and mov insns happen right here.
10749 Putting the unwind info here at the end of the ms_hook
10750 is done so that we can make absolutely certain we get
10751 the required byte sequence at the start of the function,
10752 rather than relying on an assembler that can produce
10753 the exact encoding required.
10755 However it does mean (in the unpatched case) that we have
10756 a 1 insn window where the asynchronous unwind info is
10757 incorrect. However, if we placed the unwind info at
10758 its correct location we would have incorrect unwind info
10759 in the patched case. Which is probably all moot since
10760 I don't expect Wine generates dwarf2 unwind info for the
10761 system libraries that use this feature. */
10763 insn = emit_insn (gen_blockage ());
10765 push = gen_push (hard_frame_pointer_rtx);
10766 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10767 stack_pointer_rtx);
10768 RTX_FRAME_RELATED_P (push) = 1;
10769 RTX_FRAME_RELATED_P (mov) = 1;
10771 RTX_FRAME_RELATED_P (insn) = 1;
10772 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10773 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10775 /* Note that gen_push incremented m->fs.cfa_offset, even
10776 though we didn't emit the push insn here. */
10777 m->fs.cfa_reg = hard_frame_pointer_rtx;
10778 m->fs.fp_offset = m->fs.cfa_offset;
10779 m->fs.fp_valid = true;
10781 else
10783 /* The frame pointer is not needed so pop %ebp again.
10784 This leaves us with a pristine state. */
10785 emit_insn (gen_pop (hard_frame_pointer_rtx));
10789 /* The first insn of a function that accepts its static chain on the
10790 stack is to push the register that would be filled in by a direct
10791 call. This insn will be skipped by the trampoline. */
10792 else if (ix86_static_chain_on_stack)
10794 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10795 emit_insn (gen_blockage ());
10797 /* We don't want to interpret this push insn as a register save,
10798 only as a stack adjustment. The real copy of the register as
10799 a save will be done later, if needed. */
10800 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10801 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10802 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10803 RTX_FRAME_RELATED_P (insn) = 1;
10806 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10807 of DRAP is needed and stack realignment is really needed after reload */
10808 if (stack_realign_drap)
10810 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10812 /* Only need to push parameter pointer reg if it is caller saved. */
10813 if (!call_used_regs[REGNO (crtl->drap_reg)])
10815 /* Push arg pointer reg */
10816 insn = emit_insn (gen_push (crtl->drap_reg));
10817 RTX_FRAME_RELATED_P (insn) = 1;
10820 /* Grab the argument pointer. */
10821 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10822 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10823 RTX_FRAME_RELATED_P (insn) = 1;
10824 m->fs.cfa_reg = crtl->drap_reg;
10825 m->fs.cfa_offset = 0;
10827 /* Align the stack. */
10828 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10829 stack_pointer_rtx,
10830 GEN_INT (-align_bytes)));
10831 RTX_FRAME_RELATED_P (insn) = 1;
10833 /* Replicate the return address on the stack so that return
10834 address can be reached via (argp - 1) slot. This is needed
10835 to implement macro RETURN_ADDR_RTX and intrinsic function
10836 expand_builtin_return_addr etc. */
10837 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10838 t = gen_frame_mem (word_mode, t);
10839 insn = emit_insn (gen_push (t));
10840 RTX_FRAME_RELATED_P (insn) = 1;
10842 /* For the purposes of frame and register save area addressing,
10843 we've started over with a new frame. */
10844 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10845 m->fs.realigned = true;
10848 int_registers_saved = (frame.nregs == 0);
10849 sse_registers_saved = (frame.nsseregs == 0);
10851 if (frame_pointer_needed && !m->fs.fp_valid)
10853 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10854 slower on all targets. Also sdb doesn't like it. */
10855 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10856 RTX_FRAME_RELATED_P (insn) = 1;
10858 /* Push registers now, before setting the frame pointer
10859 on SEH target. */
10860 if (!int_registers_saved
10861 && TARGET_SEH
10862 && !frame.save_regs_using_mov)
10864 ix86_emit_save_regs ();
10865 int_registers_saved = true;
10866 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10869 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10871 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10872 RTX_FRAME_RELATED_P (insn) = 1;
10874 if (m->fs.cfa_reg == stack_pointer_rtx)
10875 m->fs.cfa_reg = hard_frame_pointer_rtx;
10876 m->fs.fp_offset = m->fs.sp_offset;
10877 m->fs.fp_valid = true;
10881 if (!int_registers_saved)
10883 /* If saving registers via PUSH, do so now. */
10884 if (!frame.save_regs_using_mov)
10886 ix86_emit_save_regs ();
10887 int_registers_saved = true;
10888 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10891 /* When using red zone we may start register saving before allocating
10892 the stack frame saving one cycle of the prologue. However, avoid
10893 doing this if we have to probe the stack; at least on x86_64 the
10894 stack probe can turn into a call that clobbers a red zone location. */
10895 else if (ix86_using_red_zone ()
10896 && (! TARGET_STACK_PROBE
10897 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10899 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10900 int_registers_saved = true;
10904 if (stack_realign_fp)
10906 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10907 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10909 /* The computation of the size of the re-aligned stack frame means
10910 that we must allocate the size of the register save area before
10911 performing the actual alignment. Otherwise we cannot guarantee
10912 that there's enough storage above the realignment point. */
10913 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10914 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10915 GEN_INT (m->fs.sp_offset
10916 - frame.sse_reg_save_offset),
10917 -1, false);
10919 /* Align the stack. */
10920 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10921 stack_pointer_rtx,
10922 GEN_INT (-align_bytes)));
10924 /* For the purposes of register save area addressing, the stack
10925 pointer is no longer valid. As for the value of sp_offset,
10926 see ix86_compute_frame_layout, which we need to match in order
10927 to pass verification of stack_pointer_offset at the end. */
10928 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10929 m->fs.sp_valid = false;
10932 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10934 if (flag_stack_usage_info)
10936 /* We start to count from ARG_POINTER. */
10937 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10939 /* If it was realigned, take into account the fake frame. */
10940 if (stack_realign_drap)
10942 if (ix86_static_chain_on_stack)
10943 stack_size += UNITS_PER_WORD;
10945 if (!call_used_regs[REGNO (crtl->drap_reg)])
10946 stack_size += UNITS_PER_WORD;
10948 /* This over-estimates by 1 minimal-stack-alignment-unit but
10949 mitigates that by counting in the new return address slot. */
10950 current_function_dynamic_stack_size
10951 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10954 current_function_static_stack_size = stack_size;
10957 /* On SEH target with very large frame size, allocate an area to save
10958 SSE registers (as the very large allocation won't be described). */
10959 if (TARGET_SEH
10960 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10961 && !sse_registers_saved)
10963 HOST_WIDE_INT sse_size =
10964 frame.sse_reg_save_offset - frame.reg_save_offset;
10966 gcc_assert (int_registers_saved);
10968 /* No need to do stack checking as the area will be immediately
10969 written. */
10970 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10971 GEN_INT (-sse_size), -1,
10972 m->fs.cfa_reg == stack_pointer_rtx);
10973 allocate -= sse_size;
10974 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10975 sse_registers_saved = true;
10978 /* The stack has already been decremented by the instruction calling us
10979 so probe if the size is non-negative to preserve the protection area. */
10980 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10982 /* We expect the registers to be saved when probes are used. */
10983 gcc_assert (int_registers_saved);
10985 if (STACK_CHECK_MOVING_SP)
10987 if (!(crtl->is_leaf && !cfun->calls_alloca
10988 && allocate <= PROBE_INTERVAL))
10990 ix86_adjust_stack_and_probe (allocate);
10991 allocate = 0;
10994 else
10996 HOST_WIDE_INT size = allocate;
10998 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10999 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11001 if (TARGET_STACK_PROBE)
11003 if (crtl->is_leaf && !cfun->calls_alloca)
11005 if (size > PROBE_INTERVAL)
11006 ix86_emit_probe_stack_range (0, size);
11008 else
11009 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11011 else
11013 if (crtl->is_leaf && !cfun->calls_alloca)
11015 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11016 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11017 size - STACK_CHECK_PROTECT);
11019 else
11020 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11025 if (allocate == 0)
11027 else if (!ix86_target_stack_probe ()
11028 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11030 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11031 GEN_INT (-allocate), -1,
11032 m->fs.cfa_reg == stack_pointer_rtx);
11034 else
11036 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11037 rtx r10 = NULL;
11038 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11039 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11040 bool eax_live = ix86_eax_live_at_start_p ();
11041 bool r10_live = false;
11043 if (TARGET_64BIT)
11044 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11046 if (eax_live)
11048 insn = emit_insn (gen_push (eax));
11049 allocate -= UNITS_PER_WORD;
11050 /* Note that SEH directives need to continue tracking the stack
11051 pointer even after the frame pointer has been set up. */
11052 if (sp_is_cfa_reg || TARGET_SEH)
11054 if (sp_is_cfa_reg)
11055 m->fs.cfa_offset += UNITS_PER_WORD;
11056 RTX_FRAME_RELATED_P (insn) = 1;
11060 if (r10_live)
11062 r10 = gen_rtx_REG (Pmode, R10_REG);
11063 insn = emit_insn (gen_push (r10));
11064 allocate -= UNITS_PER_WORD;
11065 if (sp_is_cfa_reg || TARGET_SEH)
11067 if (sp_is_cfa_reg)
11068 m->fs.cfa_offset += UNITS_PER_WORD;
11069 RTX_FRAME_RELATED_P (insn) = 1;
11073 emit_move_insn (eax, GEN_INT (allocate));
11074 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11076 /* Use the fact that AX still contains ALLOCATE. */
11077 adjust_stack_insn = (Pmode == DImode
11078 ? gen_pro_epilogue_adjust_stack_di_sub
11079 : gen_pro_epilogue_adjust_stack_si_sub);
11081 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11082 stack_pointer_rtx, eax));
11084 if (sp_is_cfa_reg || TARGET_SEH)
11086 if (sp_is_cfa_reg)
11087 m->fs.cfa_offset += allocate;
11088 RTX_FRAME_RELATED_P (insn) = 1;
11089 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11090 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11091 plus_constant (Pmode, stack_pointer_rtx,
11092 -allocate)));
11094 m->fs.sp_offset += allocate;
11096 /* Use stack_pointer_rtx for relative addressing so that code
11097 works for realigned stack, too. */
11098 if (r10_live && eax_live)
11100 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11101 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11102 gen_frame_mem (word_mode, t));
11103 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11104 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11105 gen_frame_mem (word_mode, t));
11107 else if (eax_live || r10_live)
11109 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11110 emit_move_insn (gen_rtx_REG (word_mode,
11111 (eax_live ? AX_REG : R10_REG)),
11112 gen_frame_mem (word_mode, t));
11115 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11117 /* If we havn't already set up the frame pointer, do so now. */
11118 if (frame_pointer_needed && !m->fs.fp_valid)
11120 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11121 GEN_INT (frame.stack_pointer_offset
11122 - frame.hard_frame_pointer_offset));
11123 insn = emit_insn (insn);
11124 RTX_FRAME_RELATED_P (insn) = 1;
11125 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11127 if (m->fs.cfa_reg == stack_pointer_rtx)
11128 m->fs.cfa_reg = hard_frame_pointer_rtx;
11129 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11130 m->fs.fp_valid = true;
11133 if (!int_registers_saved)
11134 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11135 if (!sse_registers_saved)
11136 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11138 pic_reg_used = false;
11139 /* We don't use pic-register for pe-coff target. */
11140 if (pic_offset_table_rtx
11141 && !TARGET_PECOFF
11142 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11143 || crtl->profile))
11145 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11147 if (alt_pic_reg_used != INVALID_REGNUM)
11148 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11150 pic_reg_used = true;
11153 if (pic_reg_used)
11155 if (TARGET_64BIT)
11157 if (ix86_cmodel == CM_LARGE_PIC)
11159 rtx label, tmp_reg;
11161 gcc_assert (Pmode == DImode);
11162 label = gen_label_rtx ();
11163 emit_label (label);
11164 LABEL_PRESERVE_P (label) = 1;
11165 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11166 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11167 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11168 label));
11169 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11170 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11171 pic_offset_table_rtx, tmp_reg));
11173 else
11174 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11176 else
11178 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11179 RTX_FRAME_RELATED_P (insn) = 1;
11180 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11184 /* In the pic_reg_used case, make sure that the got load isn't deleted
11185 when mcount needs it. Blockage to avoid call movement across mcount
11186 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11187 note. */
11188 if (crtl->profile && !flag_fentry && pic_reg_used)
11189 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11191 if (crtl->drap_reg && !crtl->stack_realign_needed)
11193 /* vDRAP is setup but after reload it turns out stack realign
11194 isn't necessary, here we will emit prologue to setup DRAP
11195 without stack realign adjustment */
11196 t = choose_baseaddr (0);
11197 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11200 /* Prevent instructions from being scheduled into register save push
11201 sequence when access to the redzone area is done through frame pointer.
11202 The offset between the frame pointer and the stack pointer is calculated
11203 relative to the value of the stack pointer at the end of the function
11204 prologue, and moving instructions that access redzone area via frame
11205 pointer inside push sequence violates this assumption. */
11206 if (frame_pointer_needed && frame.red_zone_size)
11207 emit_insn (gen_memory_blockage ());
11209 /* Emit cld instruction if stringops are used in the function. */
11210 if (TARGET_CLD && ix86_current_function_needs_cld)
11211 emit_insn (gen_cld ());
11213 /* SEH requires that the prologue end within 256 bytes of the start of
11214 the function. Prevent instruction schedules that would extend that.
11215 Further, prevent alloca modifications to the stack pointer from being
11216 combined with prologue modifications. */
11217 if (TARGET_SEH)
11218 emit_insn (gen_prologue_use (stack_pointer_rtx));
11221 /* Emit code to restore REG using a POP insn. */
11223 static void
11224 ix86_emit_restore_reg_using_pop (rtx reg)
11226 struct machine_function *m = cfun->machine;
11227 rtx insn = emit_insn (gen_pop (reg));
11229 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11230 m->fs.sp_offset -= UNITS_PER_WORD;
11232 if (m->fs.cfa_reg == crtl->drap_reg
11233 && REGNO (reg) == REGNO (crtl->drap_reg))
11235 /* Previously we'd represented the CFA as an expression
11236 like *(%ebp - 8). We've just popped that value from
11237 the stack, which means we need to reset the CFA to
11238 the drap register. This will remain until we restore
11239 the stack pointer. */
11240 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11241 RTX_FRAME_RELATED_P (insn) = 1;
11243 /* This means that the DRAP register is valid for addressing too. */
11244 m->fs.drap_valid = true;
11245 return;
11248 if (m->fs.cfa_reg == stack_pointer_rtx)
11250 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11251 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11252 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11253 RTX_FRAME_RELATED_P (insn) = 1;
11255 m->fs.cfa_offset -= UNITS_PER_WORD;
11258 /* When the frame pointer is the CFA, and we pop it, we are
11259 swapping back to the stack pointer as the CFA. This happens
11260 for stack frames that don't allocate other data, so we assume
11261 the stack pointer is now pointing at the return address, i.e.
11262 the function entry state, which makes the offset be 1 word. */
11263 if (reg == hard_frame_pointer_rtx)
11265 m->fs.fp_valid = false;
11266 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11268 m->fs.cfa_reg = stack_pointer_rtx;
11269 m->fs.cfa_offset -= UNITS_PER_WORD;
11271 add_reg_note (insn, REG_CFA_DEF_CFA,
11272 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11273 GEN_INT (m->fs.cfa_offset)));
11274 RTX_FRAME_RELATED_P (insn) = 1;
11279 /* Emit code to restore saved registers using POP insns. */
11281 static void
11282 ix86_emit_restore_regs_using_pop (void)
11284 unsigned int regno;
11286 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11287 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11288 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11291 /* Emit code and notes for the LEAVE instruction. */
11293 static void
11294 ix86_emit_leave (void)
11296 struct machine_function *m = cfun->machine;
11297 rtx insn = emit_insn (ix86_gen_leave ());
11299 ix86_add_queued_cfa_restore_notes (insn);
11301 gcc_assert (m->fs.fp_valid);
11302 m->fs.sp_valid = true;
11303 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11304 m->fs.fp_valid = false;
11306 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11308 m->fs.cfa_reg = stack_pointer_rtx;
11309 m->fs.cfa_offset = m->fs.sp_offset;
11311 add_reg_note (insn, REG_CFA_DEF_CFA,
11312 plus_constant (Pmode, stack_pointer_rtx,
11313 m->fs.sp_offset));
11314 RTX_FRAME_RELATED_P (insn) = 1;
11316 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11317 m->fs.fp_offset);
11320 /* Emit code to restore saved registers using MOV insns.
11321 First register is restored from CFA - CFA_OFFSET. */
11322 static void
11323 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11324 bool maybe_eh_return)
11326 struct machine_function *m = cfun->machine;
11327 unsigned int regno;
11329 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11330 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11332 rtx reg = gen_rtx_REG (word_mode, regno);
11333 rtx insn, mem;
11335 mem = choose_baseaddr (cfa_offset);
11336 mem = gen_frame_mem (word_mode, mem);
11337 insn = emit_move_insn (reg, mem);
11339 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11341 /* Previously we'd represented the CFA as an expression
11342 like *(%ebp - 8). We've just popped that value from
11343 the stack, which means we need to reset the CFA to
11344 the drap register. This will remain until we restore
11345 the stack pointer. */
11346 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11347 RTX_FRAME_RELATED_P (insn) = 1;
11349 /* This means that the DRAP register is valid for addressing. */
11350 m->fs.drap_valid = true;
11352 else
11353 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11355 cfa_offset -= UNITS_PER_WORD;
11359 /* Emit code to restore saved registers using MOV insns.
11360 First register is restored from CFA - CFA_OFFSET. */
11361 static void
11362 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11363 bool maybe_eh_return)
11365 unsigned int regno;
11367 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11368 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11370 rtx reg = gen_rtx_REG (V4SFmode, regno);
11371 rtx mem;
11373 mem = choose_baseaddr (cfa_offset);
11374 mem = gen_rtx_MEM (V4SFmode, mem);
11375 set_mem_align (mem, 128);
11376 emit_move_insn (reg, mem);
11378 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11380 cfa_offset -= 16;
11384 /* Restore function stack, frame, and registers. */
11386 void
11387 ix86_expand_epilogue (int style)
11389 struct machine_function *m = cfun->machine;
11390 struct machine_frame_state frame_state_save = m->fs;
11391 struct ix86_frame frame;
11392 bool restore_regs_via_mov;
11393 bool using_drap;
11395 ix86_finalize_stack_realign_flags ();
11396 ix86_compute_frame_layout (&frame);
11398 m->fs.sp_valid = (!frame_pointer_needed
11399 || (crtl->sp_is_unchanging
11400 && !stack_realign_fp));
11401 gcc_assert (!m->fs.sp_valid
11402 || m->fs.sp_offset == frame.stack_pointer_offset);
11404 /* The FP must be valid if the frame pointer is present. */
11405 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11406 gcc_assert (!m->fs.fp_valid
11407 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11409 /* We must have *some* valid pointer to the stack frame. */
11410 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11412 /* The DRAP is never valid at this point. */
11413 gcc_assert (!m->fs.drap_valid);
11415 /* See the comment about red zone and frame
11416 pointer usage in ix86_expand_prologue. */
11417 if (frame_pointer_needed && frame.red_zone_size)
11418 emit_insn (gen_memory_blockage ());
11420 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11421 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11423 /* Determine the CFA offset of the end of the red-zone. */
11424 m->fs.red_zone_offset = 0;
11425 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11427 /* The red-zone begins below the return address. */
11428 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11430 /* When the register save area is in the aligned portion of
11431 the stack, determine the maximum runtime displacement that
11432 matches up with the aligned frame. */
11433 if (stack_realign_drap)
11434 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11435 + UNITS_PER_WORD);
11438 /* Special care must be taken for the normal return case of a function
11439 using eh_return: the eax and edx registers are marked as saved, but
11440 not restored along this path. Adjust the save location to match. */
11441 if (crtl->calls_eh_return && style != 2)
11442 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11444 /* EH_RETURN requires the use of moves to function properly. */
11445 if (crtl->calls_eh_return)
11446 restore_regs_via_mov = true;
11447 /* SEH requires the use of pops to identify the epilogue. */
11448 else if (TARGET_SEH)
11449 restore_regs_via_mov = false;
11450 /* If we're only restoring one register and sp is not valid then
11451 using a move instruction to restore the register since it's
11452 less work than reloading sp and popping the register. */
11453 else if (!m->fs.sp_valid && frame.nregs <= 1)
11454 restore_regs_via_mov = true;
11455 else if (TARGET_EPILOGUE_USING_MOVE
11456 && cfun->machine->use_fast_prologue_epilogue
11457 && (frame.nregs > 1
11458 || m->fs.sp_offset != frame.reg_save_offset))
11459 restore_regs_via_mov = true;
11460 else if (frame_pointer_needed
11461 && !frame.nregs
11462 && m->fs.sp_offset != frame.reg_save_offset)
11463 restore_regs_via_mov = true;
11464 else if (frame_pointer_needed
11465 && TARGET_USE_LEAVE
11466 && cfun->machine->use_fast_prologue_epilogue
11467 && frame.nregs == 1)
11468 restore_regs_via_mov = true;
11469 else
11470 restore_regs_via_mov = false;
11472 if (restore_regs_via_mov || frame.nsseregs)
11474 /* Ensure that the entire register save area is addressable via
11475 the stack pointer, if we will restore via sp. */
11476 if (TARGET_64BIT
11477 && m->fs.sp_offset > 0x7fffffff
11478 && !(m->fs.fp_valid || m->fs.drap_valid)
11479 && (frame.nsseregs + frame.nregs) != 0)
11481 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11482 GEN_INT (m->fs.sp_offset
11483 - frame.sse_reg_save_offset),
11484 style,
11485 m->fs.cfa_reg == stack_pointer_rtx);
11489 /* If there are any SSE registers to restore, then we have to do it
11490 via moves, since there's obviously no pop for SSE regs. */
11491 if (frame.nsseregs)
11492 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11493 style == 2);
11495 if (restore_regs_via_mov)
11497 rtx t;
11499 if (frame.nregs)
11500 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11502 /* eh_return epilogues need %ecx added to the stack pointer. */
11503 if (style == 2)
11505 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11507 /* Stack align doesn't work with eh_return. */
11508 gcc_assert (!stack_realign_drap);
11509 /* Neither does regparm nested functions. */
11510 gcc_assert (!ix86_static_chain_on_stack);
11512 if (frame_pointer_needed)
11514 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11515 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11516 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11518 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11519 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11521 /* Note that we use SA as a temporary CFA, as the return
11522 address is at the proper place relative to it. We
11523 pretend this happens at the FP restore insn because
11524 prior to this insn the FP would be stored at the wrong
11525 offset relative to SA, and after this insn we have no
11526 other reasonable register to use for the CFA. We don't
11527 bother resetting the CFA to the SP for the duration of
11528 the return insn. */
11529 add_reg_note (insn, REG_CFA_DEF_CFA,
11530 plus_constant (Pmode, sa, UNITS_PER_WORD));
11531 ix86_add_queued_cfa_restore_notes (insn);
11532 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11533 RTX_FRAME_RELATED_P (insn) = 1;
11535 m->fs.cfa_reg = sa;
11536 m->fs.cfa_offset = UNITS_PER_WORD;
11537 m->fs.fp_valid = false;
11539 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11540 const0_rtx, style, false);
11542 else
11544 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11545 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11546 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11547 ix86_add_queued_cfa_restore_notes (insn);
11549 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11550 if (m->fs.cfa_offset != UNITS_PER_WORD)
11552 m->fs.cfa_offset = UNITS_PER_WORD;
11553 add_reg_note (insn, REG_CFA_DEF_CFA,
11554 plus_constant (Pmode, stack_pointer_rtx,
11555 UNITS_PER_WORD));
11556 RTX_FRAME_RELATED_P (insn) = 1;
11559 m->fs.sp_offset = UNITS_PER_WORD;
11560 m->fs.sp_valid = true;
11563 else
11565 /* SEH requires that the function end with (1) a stack adjustment
11566 if necessary, (2) a sequence of pops, and (3) a return or
11567 jump instruction. Prevent insns from the function body from
11568 being scheduled into this sequence. */
11569 if (TARGET_SEH)
11571 /* Prevent a catch region from being adjacent to the standard
11572 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11573 several other flags that would be interesting to test are
11574 not yet set up. */
11575 if (flag_non_call_exceptions)
11576 emit_insn (gen_nops (const1_rtx));
11577 else
11578 emit_insn (gen_blockage ());
11581 /* First step is to deallocate the stack frame so that we can
11582 pop the registers. Also do it on SEH target for very large
11583 frame as the emitted instructions aren't allowed by the ABI in
11584 epilogues. */
11585 if (!m->fs.sp_valid
11586 || (TARGET_SEH
11587 && (m->fs.sp_offset - frame.reg_save_offset
11588 >= SEH_MAX_FRAME_SIZE)))
11590 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11591 GEN_INT (m->fs.fp_offset
11592 - frame.reg_save_offset),
11593 style, false);
11595 else if (m->fs.sp_offset != frame.reg_save_offset)
11597 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11598 GEN_INT (m->fs.sp_offset
11599 - frame.reg_save_offset),
11600 style,
11601 m->fs.cfa_reg == stack_pointer_rtx);
11604 ix86_emit_restore_regs_using_pop ();
11607 /* If we used a stack pointer and haven't already got rid of it,
11608 then do so now. */
11609 if (m->fs.fp_valid)
11611 /* If the stack pointer is valid and pointing at the frame
11612 pointer store address, then we only need a pop. */
11613 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11614 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11615 /* Leave results in shorter dependency chains on CPUs that are
11616 able to grok it fast. */
11617 else if (TARGET_USE_LEAVE
11618 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11619 || !cfun->machine->use_fast_prologue_epilogue)
11620 ix86_emit_leave ();
11621 else
11623 pro_epilogue_adjust_stack (stack_pointer_rtx,
11624 hard_frame_pointer_rtx,
11625 const0_rtx, style, !using_drap);
11626 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11630 if (using_drap)
11632 int param_ptr_offset = UNITS_PER_WORD;
11633 rtx insn;
11635 gcc_assert (stack_realign_drap);
11637 if (ix86_static_chain_on_stack)
11638 param_ptr_offset += UNITS_PER_WORD;
11639 if (!call_used_regs[REGNO (crtl->drap_reg)])
11640 param_ptr_offset += UNITS_PER_WORD;
11642 insn = emit_insn (gen_rtx_SET
11643 (VOIDmode, stack_pointer_rtx,
11644 gen_rtx_PLUS (Pmode,
11645 crtl->drap_reg,
11646 GEN_INT (-param_ptr_offset))));
11647 m->fs.cfa_reg = stack_pointer_rtx;
11648 m->fs.cfa_offset = param_ptr_offset;
11649 m->fs.sp_offset = param_ptr_offset;
11650 m->fs.realigned = false;
11652 add_reg_note (insn, REG_CFA_DEF_CFA,
11653 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11654 GEN_INT (param_ptr_offset)));
11655 RTX_FRAME_RELATED_P (insn) = 1;
11657 if (!call_used_regs[REGNO (crtl->drap_reg)])
11658 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11661 /* At this point the stack pointer must be valid, and we must have
11662 restored all of the registers. We may not have deallocated the
11663 entire stack frame. We've delayed this until now because it may
11664 be possible to merge the local stack deallocation with the
11665 deallocation forced by ix86_static_chain_on_stack. */
11666 gcc_assert (m->fs.sp_valid);
11667 gcc_assert (!m->fs.fp_valid);
11668 gcc_assert (!m->fs.realigned);
11669 if (m->fs.sp_offset != UNITS_PER_WORD)
11671 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11672 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11673 style, true);
11675 else
11676 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11678 /* Sibcall epilogues don't want a return instruction. */
11679 if (style == 0)
11681 m->fs = frame_state_save;
11682 return;
11685 if (crtl->args.pops_args && crtl->args.size)
11687 rtx popc = GEN_INT (crtl->args.pops_args);
11689 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11690 address, do explicit add, and jump indirectly to the caller. */
11692 if (crtl->args.pops_args >= 65536)
11694 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11695 rtx insn;
11697 /* There is no "pascal" calling convention in any 64bit ABI. */
11698 gcc_assert (!TARGET_64BIT);
11700 insn = emit_insn (gen_pop (ecx));
11701 m->fs.cfa_offset -= UNITS_PER_WORD;
11702 m->fs.sp_offset -= UNITS_PER_WORD;
11704 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11705 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11706 add_reg_note (insn, REG_CFA_REGISTER,
11707 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11708 RTX_FRAME_RELATED_P (insn) = 1;
11710 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11711 popc, -1, true);
11712 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11714 else
11715 emit_jump_insn (gen_simple_return_pop_internal (popc));
11717 else
11718 emit_jump_insn (gen_simple_return_internal ());
11720 /* Restore the state back to the state from the prologue,
11721 so that it's correct for the next epilogue. */
11722 m->fs = frame_state_save;
11725 /* Reset from the function's potential modifications. */
11727 static void
11728 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11729 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11731 if (pic_offset_table_rtx)
11732 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11733 #if TARGET_MACHO
11734 /* Mach-O doesn't support labels at the end of objects, so if
11735 it looks like we might want one, insert a NOP. */
11737 rtx insn = get_last_insn ();
11738 rtx deleted_debug_label = NULL_RTX;
11739 while (insn
11740 && NOTE_P (insn)
11741 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11743 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11744 notes only, instead set their CODE_LABEL_NUMBER to -1,
11745 otherwise there would be code generation differences
11746 in between -g and -g0. */
11747 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11748 deleted_debug_label = insn;
11749 insn = PREV_INSN (insn);
11751 if (insn
11752 && (LABEL_P (insn)
11753 || (NOTE_P (insn)
11754 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11755 fputs ("\tnop\n", file);
11756 else if (deleted_debug_label)
11757 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11758 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11759 CODE_LABEL_NUMBER (insn) = -1;
11761 #endif
11765 /* Return a scratch register to use in the split stack prologue. The
11766 split stack prologue is used for -fsplit-stack. It is the first
11767 instructions in the function, even before the regular prologue.
11768 The scratch register can be any caller-saved register which is not
11769 used for parameters or for the static chain. */
11771 static unsigned int
11772 split_stack_prologue_scratch_regno (void)
11774 if (TARGET_64BIT)
11775 return R11_REG;
11776 else
11778 bool is_fastcall, is_thiscall;
11779 int regparm;
11781 is_fastcall = (lookup_attribute ("fastcall",
11782 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11783 != NULL);
11784 is_thiscall = (lookup_attribute ("thiscall",
11785 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11786 != NULL);
11787 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11789 if (is_fastcall)
11791 if (DECL_STATIC_CHAIN (cfun->decl))
11793 sorry ("-fsplit-stack does not support fastcall with "
11794 "nested function");
11795 return INVALID_REGNUM;
11797 return AX_REG;
11799 else if (is_thiscall)
11801 if (!DECL_STATIC_CHAIN (cfun->decl))
11802 return DX_REG;
11803 return AX_REG;
11805 else if (regparm < 3)
11807 if (!DECL_STATIC_CHAIN (cfun->decl))
11808 return CX_REG;
11809 else
11811 if (regparm >= 2)
11813 sorry ("-fsplit-stack does not support 2 register "
11814 " parameters for a nested function");
11815 return INVALID_REGNUM;
11817 return DX_REG;
11820 else
11822 /* FIXME: We could make this work by pushing a register
11823 around the addition and comparison. */
11824 sorry ("-fsplit-stack does not support 3 register parameters");
11825 return INVALID_REGNUM;
11830 /* A SYMBOL_REF for the function which allocates new stackspace for
11831 -fsplit-stack. */
11833 static GTY(()) rtx split_stack_fn;
11835 /* A SYMBOL_REF for the more stack function when using the large
11836 model. */
11838 static GTY(()) rtx split_stack_fn_large;
11840 /* Handle -fsplit-stack. These are the first instructions in the
11841 function, even before the regular prologue. */
11843 void
11844 ix86_expand_split_stack_prologue (void)
11846 struct ix86_frame frame;
11847 HOST_WIDE_INT allocate;
11848 unsigned HOST_WIDE_INT args_size;
11849 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11850 rtx scratch_reg = NULL_RTX;
11851 rtx varargs_label = NULL_RTX;
11852 rtx fn;
11854 gcc_assert (flag_split_stack && reload_completed);
11856 ix86_finalize_stack_realign_flags ();
11857 ix86_compute_frame_layout (&frame);
11858 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11860 /* This is the label we will branch to if we have enough stack
11861 space. We expect the basic block reordering pass to reverse this
11862 branch if optimizing, so that we branch in the unlikely case. */
11863 label = gen_label_rtx ();
11865 /* We need to compare the stack pointer minus the frame size with
11866 the stack boundary in the TCB. The stack boundary always gives
11867 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11868 can compare directly. Otherwise we need to do an addition. */
11870 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11871 UNSPEC_STACK_CHECK);
11872 limit = gen_rtx_CONST (Pmode, limit);
11873 limit = gen_rtx_MEM (Pmode, limit);
11874 if (allocate < SPLIT_STACK_AVAILABLE)
11875 current = stack_pointer_rtx;
11876 else
11878 unsigned int scratch_regno;
11879 rtx offset;
11881 /* We need a scratch register to hold the stack pointer minus
11882 the required frame size. Since this is the very start of the
11883 function, the scratch register can be any caller-saved
11884 register which is not used for parameters. */
11885 offset = GEN_INT (- allocate);
11886 scratch_regno = split_stack_prologue_scratch_regno ();
11887 if (scratch_regno == INVALID_REGNUM)
11888 return;
11889 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11890 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11892 /* We don't use ix86_gen_add3 in this case because it will
11893 want to split to lea, but when not optimizing the insn
11894 will not be split after this point. */
11895 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11896 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11897 offset)));
11899 else
11901 emit_move_insn (scratch_reg, offset);
11902 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11903 stack_pointer_rtx));
11905 current = scratch_reg;
11908 ix86_expand_branch (GEU, current, limit, label);
11909 jump_insn = get_last_insn ();
11910 JUMP_LABEL (jump_insn) = label;
11912 /* Mark the jump as very likely to be taken. */
11913 add_int_reg_note (jump_insn, REG_BR_PROB,
11914 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11916 if (split_stack_fn == NULL_RTX)
11917 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11918 fn = split_stack_fn;
11920 /* Get more stack space. We pass in the desired stack space and the
11921 size of the arguments to copy to the new stack. In 32-bit mode
11922 we push the parameters; __morestack will return on a new stack
11923 anyhow. In 64-bit mode we pass the parameters in r10 and
11924 r11. */
11925 allocate_rtx = GEN_INT (allocate);
11926 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11927 call_fusage = NULL_RTX;
11928 if (TARGET_64BIT)
11930 rtx reg10, reg11;
11932 reg10 = gen_rtx_REG (Pmode, R10_REG);
11933 reg11 = gen_rtx_REG (Pmode, R11_REG);
11935 /* If this function uses a static chain, it will be in %r10.
11936 Preserve it across the call to __morestack. */
11937 if (DECL_STATIC_CHAIN (cfun->decl))
11939 rtx rax;
11941 rax = gen_rtx_REG (word_mode, AX_REG);
11942 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11943 use_reg (&call_fusage, rax);
11946 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11947 && !TARGET_PECOFF)
11949 HOST_WIDE_INT argval;
11951 gcc_assert (Pmode == DImode);
11952 /* When using the large model we need to load the address
11953 into a register, and we've run out of registers. So we
11954 switch to a different calling convention, and we call a
11955 different function: __morestack_large. We pass the
11956 argument size in the upper 32 bits of r10 and pass the
11957 frame size in the lower 32 bits. */
11958 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11959 gcc_assert ((args_size & 0xffffffff) == args_size);
11961 if (split_stack_fn_large == NULL_RTX)
11962 split_stack_fn_large =
11963 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11965 if (ix86_cmodel == CM_LARGE_PIC)
11967 rtx label, x;
11969 label = gen_label_rtx ();
11970 emit_label (label);
11971 LABEL_PRESERVE_P (label) = 1;
11972 emit_insn (gen_set_rip_rex64 (reg10, label));
11973 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11974 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11975 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11976 UNSPEC_GOT);
11977 x = gen_rtx_CONST (Pmode, x);
11978 emit_move_insn (reg11, x);
11979 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11980 x = gen_const_mem (Pmode, x);
11981 emit_move_insn (reg11, x);
11983 else
11984 emit_move_insn (reg11, split_stack_fn_large);
11986 fn = reg11;
11988 argval = ((args_size << 16) << 16) + allocate;
11989 emit_move_insn (reg10, GEN_INT (argval));
11991 else
11993 emit_move_insn (reg10, allocate_rtx);
11994 emit_move_insn (reg11, GEN_INT (args_size));
11995 use_reg (&call_fusage, reg11);
11998 use_reg (&call_fusage, reg10);
12000 else
12002 emit_insn (gen_push (GEN_INT (args_size)));
12003 emit_insn (gen_push (allocate_rtx));
12005 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12006 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12007 NULL_RTX, false);
12008 add_function_usage_to (call_insn, call_fusage);
12010 /* In order to make call/return prediction work right, we now need
12011 to execute a return instruction. See
12012 libgcc/config/i386/morestack.S for the details on how this works.
12014 For flow purposes gcc must not see this as a return
12015 instruction--we need control flow to continue at the subsequent
12016 label. Therefore, we use an unspec. */
12017 gcc_assert (crtl->args.pops_args < 65536);
12018 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12020 /* If we are in 64-bit mode and this function uses a static chain,
12021 we saved %r10 in %rax before calling _morestack. */
12022 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12023 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12024 gen_rtx_REG (word_mode, AX_REG));
12026 /* If this function calls va_start, we need to store a pointer to
12027 the arguments on the old stack, because they may not have been
12028 all copied to the new stack. At this point the old stack can be
12029 found at the frame pointer value used by __morestack, because
12030 __morestack has set that up before calling back to us. Here we
12031 store that pointer in a scratch register, and in
12032 ix86_expand_prologue we store the scratch register in a stack
12033 slot. */
12034 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12036 unsigned int scratch_regno;
12037 rtx frame_reg;
12038 int words;
12040 scratch_regno = split_stack_prologue_scratch_regno ();
12041 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12042 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12044 /* 64-bit:
12045 fp -> old fp value
12046 return address within this function
12047 return address of caller of this function
12048 stack arguments
12049 So we add three words to get to the stack arguments.
12051 32-bit:
12052 fp -> old fp value
12053 return address within this function
12054 first argument to __morestack
12055 second argument to __morestack
12056 return address of caller of this function
12057 stack arguments
12058 So we add five words to get to the stack arguments.
12060 words = TARGET_64BIT ? 3 : 5;
12061 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12062 gen_rtx_PLUS (Pmode, frame_reg,
12063 GEN_INT (words * UNITS_PER_WORD))));
12065 varargs_label = gen_label_rtx ();
12066 emit_jump_insn (gen_jump (varargs_label));
12067 JUMP_LABEL (get_last_insn ()) = varargs_label;
12069 emit_barrier ();
12072 emit_label (label);
12073 LABEL_NUSES (label) = 1;
12075 /* If this function calls va_start, we now have to set the scratch
12076 register for the case where we do not call __morestack. In this
12077 case we need to set it based on the stack pointer. */
12078 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12080 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12081 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12082 GEN_INT (UNITS_PER_WORD))));
12084 emit_label (varargs_label);
12085 LABEL_NUSES (varargs_label) = 1;
12089 /* We may have to tell the dataflow pass that the split stack prologue
12090 is initializing a scratch register. */
12092 static void
12093 ix86_live_on_entry (bitmap regs)
12095 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12097 gcc_assert (flag_split_stack);
12098 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12102 /* Extract the parts of an RTL expression that is a valid memory address
12103 for an instruction. Return 0 if the structure of the address is
12104 grossly off. Return -1 if the address contains ASHIFT, so it is not
12105 strictly valid, but still used for computing length of lea instruction. */
12108 ix86_decompose_address (rtx addr, struct ix86_address *out)
12110 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12111 rtx base_reg, index_reg;
12112 HOST_WIDE_INT scale = 1;
12113 rtx scale_rtx = NULL_RTX;
12114 rtx tmp;
12115 int retval = 1;
12116 enum ix86_address_seg seg = SEG_DEFAULT;
12118 /* Allow zero-extended SImode addresses,
12119 they will be emitted with addr32 prefix. */
12120 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12122 if (GET_CODE (addr) == ZERO_EXTEND
12123 && GET_MODE (XEXP (addr, 0)) == SImode)
12125 addr = XEXP (addr, 0);
12126 if (CONST_INT_P (addr))
12127 return 0;
12129 else if (GET_CODE (addr) == AND
12130 && const_32bit_mask (XEXP (addr, 1), DImode))
12132 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12133 if (addr == NULL_RTX)
12134 return 0;
12136 if (CONST_INT_P (addr))
12137 return 0;
12141 /* Allow SImode subregs of DImode addresses,
12142 they will be emitted with addr32 prefix. */
12143 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12145 if (GET_CODE (addr) == SUBREG
12146 && GET_MODE (SUBREG_REG (addr)) == DImode)
12148 addr = SUBREG_REG (addr);
12149 if (CONST_INT_P (addr))
12150 return 0;
12154 if (REG_P (addr))
12155 base = addr;
12156 else if (GET_CODE (addr) == SUBREG)
12158 if (REG_P (SUBREG_REG (addr)))
12159 base = addr;
12160 else
12161 return 0;
12163 else if (GET_CODE (addr) == PLUS)
12165 rtx addends[4], op;
12166 int n = 0, i;
12168 op = addr;
12171 if (n >= 4)
12172 return 0;
12173 addends[n++] = XEXP (op, 1);
12174 op = XEXP (op, 0);
12176 while (GET_CODE (op) == PLUS);
12177 if (n >= 4)
12178 return 0;
12179 addends[n] = op;
12181 for (i = n; i >= 0; --i)
12183 op = addends[i];
12184 switch (GET_CODE (op))
12186 case MULT:
12187 if (index)
12188 return 0;
12189 index = XEXP (op, 0);
12190 scale_rtx = XEXP (op, 1);
12191 break;
12193 case ASHIFT:
12194 if (index)
12195 return 0;
12196 index = XEXP (op, 0);
12197 tmp = XEXP (op, 1);
12198 if (!CONST_INT_P (tmp))
12199 return 0;
12200 scale = INTVAL (tmp);
12201 if ((unsigned HOST_WIDE_INT) scale > 3)
12202 return 0;
12203 scale = 1 << scale;
12204 break;
12206 case ZERO_EXTEND:
12207 op = XEXP (op, 0);
12208 if (GET_CODE (op) != UNSPEC)
12209 return 0;
12210 /* FALLTHRU */
12212 case UNSPEC:
12213 if (XINT (op, 1) == UNSPEC_TP
12214 && TARGET_TLS_DIRECT_SEG_REFS
12215 && seg == SEG_DEFAULT)
12216 seg = DEFAULT_TLS_SEG_REG;
12217 else
12218 return 0;
12219 break;
12221 case SUBREG:
12222 if (!REG_P (SUBREG_REG (op)))
12223 return 0;
12224 /* FALLTHRU */
12226 case REG:
12227 if (!base)
12228 base = op;
12229 else if (!index)
12230 index = op;
12231 else
12232 return 0;
12233 break;
12235 case CONST:
12236 case CONST_INT:
12237 case SYMBOL_REF:
12238 case LABEL_REF:
12239 if (disp)
12240 return 0;
12241 disp = op;
12242 break;
12244 default:
12245 return 0;
12249 else if (GET_CODE (addr) == MULT)
12251 index = XEXP (addr, 0); /* index*scale */
12252 scale_rtx = XEXP (addr, 1);
12254 else if (GET_CODE (addr) == ASHIFT)
12256 /* We're called for lea too, which implements ashift on occasion. */
12257 index = XEXP (addr, 0);
12258 tmp = XEXP (addr, 1);
12259 if (!CONST_INT_P (tmp))
12260 return 0;
12261 scale = INTVAL (tmp);
12262 if ((unsigned HOST_WIDE_INT) scale > 3)
12263 return 0;
12264 scale = 1 << scale;
12265 retval = -1;
12267 else
12268 disp = addr; /* displacement */
12270 if (index)
12272 if (REG_P (index))
12274 else if (GET_CODE (index) == SUBREG
12275 && REG_P (SUBREG_REG (index)))
12277 else
12278 return 0;
12281 /* Extract the integral value of scale. */
12282 if (scale_rtx)
12284 if (!CONST_INT_P (scale_rtx))
12285 return 0;
12286 scale = INTVAL (scale_rtx);
12289 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12290 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12292 /* Avoid useless 0 displacement. */
12293 if (disp == const0_rtx && (base || index))
12294 disp = NULL_RTX;
12296 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12297 if (base_reg && index_reg && scale == 1
12298 && (index_reg == arg_pointer_rtx
12299 || index_reg == frame_pointer_rtx
12300 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12302 rtx tmp;
12303 tmp = base, base = index, index = tmp;
12304 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12307 /* Special case: %ebp cannot be encoded as a base without a displacement.
12308 Similarly %r13. */
12309 if (!disp
12310 && base_reg
12311 && (base_reg == hard_frame_pointer_rtx
12312 || base_reg == frame_pointer_rtx
12313 || base_reg == arg_pointer_rtx
12314 || (REG_P (base_reg)
12315 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12316 || REGNO (base_reg) == R13_REG))))
12317 disp = const0_rtx;
12319 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12320 Avoid this by transforming to [%esi+0].
12321 Reload calls address legitimization without cfun defined, so we need
12322 to test cfun for being non-NULL. */
12323 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12324 && base_reg && !index_reg && !disp
12325 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12326 disp = const0_rtx;
12328 /* Special case: encode reg+reg instead of reg*2. */
12329 if (!base && index && scale == 2)
12330 base = index, base_reg = index_reg, scale = 1;
12332 /* Special case: scaling cannot be encoded without base or displacement. */
12333 if (!base && !disp && index && scale != 1)
12334 disp = const0_rtx;
12336 out->base = base;
12337 out->index = index;
12338 out->disp = disp;
12339 out->scale = scale;
12340 out->seg = seg;
12342 return retval;
12345 /* Return cost of the memory address x.
12346 For i386, it is better to use a complex address than let gcc copy
12347 the address into a reg and make a new pseudo. But not if the address
12348 requires to two regs - that would mean more pseudos with longer
12349 lifetimes. */
12350 static int
12351 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12352 addr_space_t as ATTRIBUTE_UNUSED,
12353 bool speed ATTRIBUTE_UNUSED)
12355 struct ix86_address parts;
12356 int cost = 1;
12357 int ok = ix86_decompose_address (x, &parts);
12359 gcc_assert (ok);
12361 if (parts.base && GET_CODE (parts.base) == SUBREG)
12362 parts.base = SUBREG_REG (parts.base);
12363 if (parts.index && GET_CODE (parts.index) == SUBREG)
12364 parts.index = SUBREG_REG (parts.index);
12366 /* Attempt to minimize number of registers in the address. */
12367 if ((parts.base
12368 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12369 || (parts.index
12370 && (!REG_P (parts.index)
12371 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12372 cost++;
12374 if (parts.base
12375 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12376 && parts.index
12377 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12378 && parts.base != parts.index)
12379 cost++;
12381 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12382 since it's predecode logic can't detect the length of instructions
12383 and it degenerates to vector decoded. Increase cost of such
12384 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12385 to split such addresses or even refuse such addresses at all.
12387 Following addressing modes are affected:
12388 [base+scale*index]
12389 [scale*index+disp]
12390 [base+index]
12392 The first and last case may be avoidable by explicitly coding the zero in
12393 memory address, but I don't have AMD-K6 machine handy to check this
12394 theory. */
12396 if (TARGET_K6
12397 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12398 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12399 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12400 cost += 10;
12402 return cost;
12405 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12406 this is used for to form addresses to local data when -fPIC is in
12407 use. */
12409 static bool
12410 darwin_local_data_pic (rtx disp)
12412 return (GET_CODE (disp) == UNSPEC
12413 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12416 /* Determine if a given RTX is a valid constant. We already know this
12417 satisfies CONSTANT_P. */
12419 static bool
12420 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12422 switch (GET_CODE (x))
12424 case CONST:
12425 x = XEXP (x, 0);
12427 if (GET_CODE (x) == PLUS)
12429 if (!CONST_INT_P (XEXP (x, 1)))
12430 return false;
12431 x = XEXP (x, 0);
12434 if (TARGET_MACHO && darwin_local_data_pic (x))
12435 return true;
12437 /* Only some unspecs are valid as "constants". */
12438 if (GET_CODE (x) == UNSPEC)
12439 switch (XINT (x, 1))
12441 case UNSPEC_GOT:
12442 case UNSPEC_GOTOFF:
12443 case UNSPEC_PLTOFF:
12444 return TARGET_64BIT;
12445 case UNSPEC_TPOFF:
12446 case UNSPEC_NTPOFF:
12447 x = XVECEXP (x, 0, 0);
12448 return (GET_CODE (x) == SYMBOL_REF
12449 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12450 case UNSPEC_DTPOFF:
12451 x = XVECEXP (x, 0, 0);
12452 return (GET_CODE (x) == SYMBOL_REF
12453 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12454 default:
12455 return false;
12458 /* We must have drilled down to a symbol. */
12459 if (GET_CODE (x) == LABEL_REF)
12460 return true;
12461 if (GET_CODE (x) != SYMBOL_REF)
12462 return false;
12463 /* FALLTHRU */
12465 case SYMBOL_REF:
12466 /* TLS symbols are never valid. */
12467 if (SYMBOL_REF_TLS_MODEL (x))
12468 return false;
12470 /* DLLIMPORT symbols are never valid. */
12471 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12472 && SYMBOL_REF_DLLIMPORT_P (x))
12473 return false;
12475 #if TARGET_MACHO
12476 /* mdynamic-no-pic */
12477 if (MACHO_DYNAMIC_NO_PIC_P)
12478 return machopic_symbol_defined_p (x);
12479 #endif
12480 break;
12482 case CONST_DOUBLE:
12483 if (GET_MODE (x) == TImode
12484 && x != CONST0_RTX (TImode)
12485 && !TARGET_64BIT)
12486 return false;
12487 break;
12489 case CONST_VECTOR:
12490 if (!standard_sse_constant_p (x))
12491 return false;
12493 default:
12494 break;
12497 /* Otherwise we handle everything else in the move patterns. */
12498 return true;
12501 /* Determine if it's legal to put X into the constant pool. This
12502 is not possible for the address of thread-local symbols, which
12503 is checked above. */
12505 static bool
12506 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12508 /* We can always put integral constants and vectors in memory. */
12509 switch (GET_CODE (x))
12511 case CONST_INT:
12512 case CONST_DOUBLE:
12513 case CONST_VECTOR:
12514 return false;
12516 default:
12517 break;
12519 return !ix86_legitimate_constant_p (mode, x);
12522 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12523 otherwise zero. */
12525 static bool
12526 is_imported_p (rtx x)
12528 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12529 || GET_CODE (x) != SYMBOL_REF)
12530 return false;
12532 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12536 /* Nonzero if the constant value X is a legitimate general operand
12537 when generating PIC code. It is given that flag_pic is on and
12538 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12540 bool
12541 legitimate_pic_operand_p (rtx x)
12543 rtx inner;
12545 switch (GET_CODE (x))
12547 case CONST:
12548 inner = XEXP (x, 0);
12549 if (GET_CODE (inner) == PLUS
12550 && CONST_INT_P (XEXP (inner, 1)))
12551 inner = XEXP (inner, 0);
12553 /* Only some unspecs are valid as "constants". */
12554 if (GET_CODE (inner) == UNSPEC)
12555 switch (XINT (inner, 1))
12557 case UNSPEC_GOT:
12558 case UNSPEC_GOTOFF:
12559 case UNSPEC_PLTOFF:
12560 return TARGET_64BIT;
12561 case UNSPEC_TPOFF:
12562 x = XVECEXP (inner, 0, 0);
12563 return (GET_CODE (x) == SYMBOL_REF
12564 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12565 case UNSPEC_MACHOPIC_OFFSET:
12566 return legitimate_pic_address_disp_p (x);
12567 default:
12568 return false;
12570 /* FALLTHRU */
12572 case SYMBOL_REF:
12573 case LABEL_REF:
12574 return legitimate_pic_address_disp_p (x);
12576 default:
12577 return true;
12581 /* Determine if a given CONST RTX is a valid memory displacement
12582 in PIC mode. */
12584 bool
12585 legitimate_pic_address_disp_p (rtx disp)
12587 bool saw_plus;
12589 /* In 64bit mode we can allow direct addresses of symbols and labels
12590 when they are not dynamic symbols. */
12591 if (TARGET_64BIT)
12593 rtx op0 = disp, op1;
12595 switch (GET_CODE (disp))
12597 case LABEL_REF:
12598 return true;
12600 case CONST:
12601 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12602 break;
12603 op0 = XEXP (XEXP (disp, 0), 0);
12604 op1 = XEXP (XEXP (disp, 0), 1);
12605 if (!CONST_INT_P (op1)
12606 || INTVAL (op1) >= 16*1024*1024
12607 || INTVAL (op1) < -16*1024*1024)
12608 break;
12609 if (GET_CODE (op0) == LABEL_REF)
12610 return true;
12611 if (GET_CODE (op0) == CONST
12612 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12613 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12614 return true;
12615 if (GET_CODE (op0) == UNSPEC
12616 && XINT (op0, 1) == UNSPEC_PCREL)
12617 return true;
12618 if (GET_CODE (op0) != SYMBOL_REF)
12619 break;
12620 /* FALLTHRU */
12622 case SYMBOL_REF:
12623 /* TLS references should always be enclosed in UNSPEC.
12624 The dllimported symbol needs always to be resolved. */
12625 if (SYMBOL_REF_TLS_MODEL (op0)
12626 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12627 return false;
12629 if (TARGET_PECOFF)
12631 if (is_imported_p (op0))
12632 return true;
12634 if (SYMBOL_REF_FAR_ADDR_P (op0)
12635 || !SYMBOL_REF_LOCAL_P (op0))
12636 break;
12638 /* Function-symbols need to be resolved only for
12639 large-model.
12640 For the small-model we don't need to resolve anything
12641 here. */
12642 if ((ix86_cmodel != CM_LARGE_PIC
12643 && SYMBOL_REF_FUNCTION_P (op0))
12644 || ix86_cmodel == CM_SMALL_PIC)
12645 return true;
12646 /* Non-external symbols don't need to be resolved for
12647 large, and medium-model. */
12648 if ((ix86_cmodel == CM_LARGE_PIC
12649 || ix86_cmodel == CM_MEDIUM_PIC)
12650 && !SYMBOL_REF_EXTERNAL_P (op0))
12651 return true;
12653 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12654 && SYMBOL_REF_LOCAL_P (op0)
12655 && ix86_cmodel != CM_LARGE_PIC)
12656 return true;
12657 break;
12659 default:
12660 break;
12663 if (GET_CODE (disp) != CONST)
12664 return false;
12665 disp = XEXP (disp, 0);
12667 if (TARGET_64BIT)
12669 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12670 of GOT tables. We should not need these anyway. */
12671 if (GET_CODE (disp) != UNSPEC
12672 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12673 && XINT (disp, 1) != UNSPEC_GOTOFF
12674 && XINT (disp, 1) != UNSPEC_PCREL
12675 && XINT (disp, 1) != UNSPEC_PLTOFF))
12676 return false;
12678 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12679 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12680 return false;
12681 return true;
12684 saw_plus = false;
12685 if (GET_CODE (disp) == PLUS)
12687 if (!CONST_INT_P (XEXP (disp, 1)))
12688 return false;
12689 disp = XEXP (disp, 0);
12690 saw_plus = true;
12693 if (TARGET_MACHO && darwin_local_data_pic (disp))
12694 return true;
12696 if (GET_CODE (disp) != UNSPEC)
12697 return false;
12699 switch (XINT (disp, 1))
12701 case UNSPEC_GOT:
12702 if (saw_plus)
12703 return false;
12704 /* We need to check for both symbols and labels because VxWorks loads
12705 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12706 details. */
12707 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12708 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12709 case UNSPEC_GOTOFF:
12710 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12711 While ABI specify also 32bit relocation but we don't produce it in
12712 small PIC model at all. */
12713 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12714 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12715 && !TARGET_64BIT)
12716 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12717 return false;
12718 case UNSPEC_GOTTPOFF:
12719 case UNSPEC_GOTNTPOFF:
12720 case UNSPEC_INDNTPOFF:
12721 if (saw_plus)
12722 return false;
12723 disp = XVECEXP (disp, 0, 0);
12724 return (GET_CODE (disp) == SYMBOL_REF
12725 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12726 case UNSPEC_NTPOFF:
12727 disp = XVECEXP (disp, 0, 0);
12728 return (GET_CODE (disp) == SYMBOL_REF
12729 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12730 case UNSPEC_DTPOFF:
12731 disp = XVECEXP (disp, 0, 0);
12732 return (GET_CODE (disp) == SYMBOL_REF
12733 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12736 return false;
12739 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12740 replace the input X, or the original X if no replacement is called for.
12741 The output parameter *WIN is 1 if the calling macro should goto WIN,
12742 0 if it should not. */
12744 bool
12745 ix86_legitimize_reload_address (rtx x,
12746 enum machine_mode mode ATTRIBUTE_UNUSED,
12747 int opnum, int type,
12748 int ind_levels ATTRIBUTE_UNUSED)
12750 /* Reload can generate:
12752 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12753 (reg:DI 97))
12754 (reg:DI 2 cx))
12756 This RTX is rejected from ix86_legitimate_address_p due to
12757 non-strictness of base register 97. Following this rejection,
12758 reload pushes all three components into separate registers,
12759 creating invalid memory address RTX.
12761 Following code reloads only the invalid part of the
12762 memory address RTX. */
12764 if (GET_CODE (x) == PLUS
12765 && REG_P (XEXP (x, 1))
12766 && GET_CODE (XEXP (x, 0)) == PLUS
12767 && REG_P (XEXP (XEXP (x, 0), 1)))
12769 rtx base, index;
12770 bool something_reloaded = false;
12772 base = XEXP (XEXP (x, 0), 1);
12773 if (!REG_OK_FOR_BASE_STRICT_P (base))
12775 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12776 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12777 opnum, (enum reload_type) type);
12778 something_reloaded = true;
12781 index = XEXP (x, 1);
12782 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12784 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12785 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12786 opnum, (enum reload_type) type);
12787 something_reloaded = true;
12790 gcc_assert (something_reloaded);
12791 return true;
12794 return false;
12797 /* Determine if op is suitable RTX for an address register.
12798 Return naked register if a register or a register subreg is
12799 found, otherwise return NULL_RTX. */
12801 static rtx
12802 ix86_validate_address_register (rtx op)
12804 enum machine_mode mode = GET_MODE (op);
12806 /* Only SImode or DImode registers can form the address. */
12807 if (mode != SImode && mode != DImode)
12808 return NULL_RTX;
12810 if (REG_P (op))
12811 return op;
12812 else if (GET_CODE (op) == SUBREG)
12814 rtx reg = SUBREG_REG (op);
12816 if (!REG_P (reg))
12817 return NULL_RTX;
12819 mode = GET_MODE (reg);
12821 /* Don't allow SUBREGs that span more than a word. It can
12822 lead to spill failures when the register is one word out
12823 of a two word structure. */
12824 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12825 return NULL_RTX;
12827 /* Allow only SUBREGs of non-eliminable hard registers. */
12828 if (register_no_elim_operand (reg, mode))
12829 return reg;
12832 /* Op is not a register. */
12833 return NULL_RTX;
12836 /* Recognizes RTL expressions that are valid memory addresses for an
12837 instruction. The MODE argument is the machine mode for the MEM
12838 expression that wants to use this address.
12840 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12841 convert common non-canonical forms to canonical form so that they will
12842 be recognized. */
12844 static bool
12845 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12846 rtx addr, bool strict)
12848 struct ix86_address parts;
12849 rtx base, index, disp;
12850 HOST_WIDE_INT scale;
12851 enum ix86_address_seg seg;
12853 if (ix86_decompose_address (addr, &parts) <= 0)
12854 /* Decomposition failed. */
12855 return false;
12857 base = parts.base;
12858 index = parts.index;
12859 disp = parts.disp;
12860 scale = parts.scale;
12861 seg = parts.seg;
12863 /* Validate base register. */
12864 if (base)
12866 rtx reg = ix86_validate_address_register (base);
12868 if (reg == NULL_RTX)
12869 return false;
12871 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12872 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12873 /* Base is not valid. */
12874 return false;
12877 /* Validate index register. */
12878 if (index)
12880 rtx reg = ix86_validate_address_register (index);
12882 if (reg == NULL_RTX)
12883 return false;
12885 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12886 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12887 /* Index is not valid. */
12888 return false;
12891 /* Index and base should have the same mode. */
12892 if (base && index
12893 && GET_MODE (base) != GET_MODE (index))
12894 return false;
12896 /* Address override works only on the (%reg) part of %fs:(%reg). */
12897 if (seg != SEG_DEFAULT
12898 && ((base && GET_MODE (base) != word_mode)
12899 || (index && GET_MODE (index) != word_mode)))
12900 return false;
12902 /* Validate scale factor. */
12903 if (scale != 1)
12905 if (!index)
12906 /* Scale without index. */
12907 return false;
12909 if (scale != 2 && scale != 4 && scale != 8)
12910 /* Scale is not a valid multiplier. */
12911 return false;
12914 /* Validate displacement. */
12915 if (disp)
12917 if (GET_CODE (disp) == CONST
12918 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12919 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12920 switch (XINT (XEXP (disp, 0), 1))
12922 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12923 used. While ABI specify also 32bit relocations, we don't produce
12924 them at all and use IP relative instead. */
12925 case UNSPEC_GOT:
12926 case UNSPEC_GOTOFF:
12927 gcc_assert (flag_pic);
12928 if (!TARGET_64BIT)
12929 goto is_legitimate_pic;
12931 /* 64bit address unspec. */
12932 return false;
12934 case UNSPEC_GOTPCREL:
12935 case UNSPEC_PCREL:
12936 gcc_assert (flag_pic);
12937 goto is_legitimate_pic;
12939 case UNSPEC_GOTTPOFF:
12940 case UNSPEC_GOTNTPOFF:
12941 case UNSPEC_INDNTPOFF:
12942 case UNSPEC_NTPOFF:
12943 case UNSPEC_DTPOFF:
12944 break;
12946 case UNSPEC_STACK_CHECK:
12947 gcc_assert (flag_split_stack);
12948 break;
12950 default:
12951 /* Invalid address unspec. */
12952 return false;
12955 else if (SYMBOLIC_CONST (disp)
12956 && (flag_pic
12957 || (TARGET_MACHO
12958 #if TARGET_MACHO
12959 && MACHOPIC_INDIRECT
12960 && !machopic_operand_p (disp)
12961 #endif
12965 is_legitimate_pic:
12966 if (TARGET_64BIT && (index || base))
12968 /* foo@dtpoff(%rX) is ok. */
12969 if (GET_CODE (disp) != CONST
12970 || GET_CODE (XEXP (disp, 0)) != PLUS
12971 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12972 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12973 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12974 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12975 /* Non-constant pic memory reference. */
12976 return false;
12978 else if ((!TARGET_MACHO || flag_pic)
12979 && ! legitimate_pic_address_disp_p (disp))
12980 /* Displacement is an invalid pic construct. */
12981 return false;
12982 #if TARGET_MACHO
12983 else if (MACHO_DYNAMIC_NO_PIC_P
12984 && !ix86_legitimate_constant_p (Pmode, disp))
12985 /* displacment must be referenced via non_lazy_pointer */
12986 return false;
12987 #endif
12989 /* This code used to verify that a symbolic pic displacement
12990 includes the pic_offset_table_rtx register.
12992 While this is good idea, unfortunately these constructs may
12993 be created by "adds using lea" optimization for incorrect
12994 code like:
12996 int a;
12997 int foo(int i)
12999 return *(&a+i);
13002 This code is nonsensical, but results in addressing
13003 GOT table with pic_offset_table_rtx base. We can't
13004 just refuse it easily, since it gets matched by
13005 "addsi3" pattern, that later gets split to lea in the
13006 case output register differs from input. While this
13007 can be handled by separate addsi pattern for this case
13008 that never results in lea, this seems to be easier and
13009 correct fix for crash to disable this test. */
13011 else if (GET_CODE (disp) != LABEL_REF
13012 && !CONST_INT_P (disp)
13013 && (GET_CODE (disp) != CONST
13014 || !ix86_legitimate_constant_p (Pmode, disp))
13015 && (GET_CODE (disp) != SYMBOL_REF
13016 || !ix86_legitimate_constant_p (Pmode, disp)))
13017 /* Displacement is not constant. */
13018 return false;
13019 else if (TARGET_64BIT
13020 && !x86_64_immediate_operand (disp, VOIDmode))
13021 /* Displacement is out of range. */
13022 return false;
13023 /* In x32 mode, constant addresses are sign extended to 64bit, so
13024 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13025 else if (TARGET_X32 && !(index || base)
13026 && CONST_INT_P (disp)
13027 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13028 return false;
13031 /* Everything looks valid. */
13032 return true;
13035 /* Determine if a given RTX is a valid constant address. */
13037 bool
13038 constant_address_p (rtx x)
13040 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13043 /* Return a unique alias set for the GOT. */
13045 static alias_set_type
13046 ix86_GOT_alias_set (void)
13048 static alias_set_type set = -1;
13049 if (set == -1)
13050 set = new_alias_set ();
13051 return set;
13054 /* Return a legitimate reference for ORIG (an address) using the
13055 register REG. If REG is 0, a new pseudo is generated.
13057 There are two types of references that must be handled:
13059 1. Global data references must load the address from the GOT, via
13060 the PIC reg. An insn is emitted to do this load, and the reg is
13061 returned.
13063 2. Static data references, constant pool addresses, and code labels
13064 compute the address as an offset from the GOT, whose base is in
13065 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13066 differentiate them from global data objects. The returned
13067 address is the PIC reg + an unspec constant.
13069 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13070 reg also appears in the address. */
13072 static rtx
13073 legitimize_pic_address (rtx orig, rtx reg)
13075 rtx addr = orig;
13076 rtx new_rtx = orig;
13078 #if TARGET_MACHO
13079 if (TARGET_MACHO && !TARGET_64BIT)
13081 if (reg == 0)
13082 reg = gen_reg_rtx (Pmode);
13083 /* Use the generic Mach-O PIC machinery. */
13084 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13086 #endif
13088 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13090 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13091 if (tmp)
13092 return tmp;
13095 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13096 new_rtx = addr;
13097 else if (TARGET_64BIT && !TARGET_PECOFF
13098 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13100 rtx tmpreg;
13101 /* This symbol may be referenced via a displacement from the PIC
13102 base address (@GOTOFF). */
13104 if (reload_in_progress)
13105 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13106 if (GET_CODE (addr) == CONST)
13107 addr = XEXP (addr, 0);
13108 if (GET_CODE (addr) == PLUS)
13110 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13111 UNSPEC_GOTOFF);
13112 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13114 else
13115 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13116 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13117 if (!reg)
13118 tmpreg = gen_reg_rtx (Pmode);
13119 else
13120 tmpreg = reg;
13121 emit_move_insn (tmpreg, new_rtx);
13123 if (reg != 0)
13125 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13126 tmpreg, 1, OPTAB_DIRECT);
13127 new_rtx = reg;
13129 else
13130 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13132 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13134 /* This symbol may be referenced via a displacement from the PIC
13135 base address (@GOTOFF). */
13137 if (reload_in_progress)
13138 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13139 if (GET_CODE (addr) == CONST)
13140 addr = XEXP (addr, 0);
13141 if (GET_CODE (addr) == PLUS)
13143 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13144 UNSPEC_GOTOFF);
13145 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13147 else
13148 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13149 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13150 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13152 if (reg != 0)
13154 emit_move_insn (reg, new_rtx);
13155 new_rtx = reg;
13158 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13159 /* We can't use @GOTOFF for text labels on VxWorks;
13160 see gotoff_operand. */
13161 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13163 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13164 if (tmp)
13165 return tmp;
13167 /* For x64 PE-COFF there is no GOT table. So we use address
13168 directly. */
13169 if (TARGET_64BIT && TARGET_PECOFF)
13171 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13172 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13174 if (reg == 0)
13175 reg = gen_reg_rtx (Pmode);
13176 emit_move_insn (reg, new_rtx);
13177 new_rtx = reg;
13179 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13181 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13182 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13183 new_rtx = gen_const_mem (Pmode, new_rtx);
13184 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13186 if (reg == 0)
13187 reg = gen_reg_rtx (Pmode);
13188 /* Use directly gen_movsi, otherwise the address is loaded
13189 into register for CSE. We don't want to CSE this addresses,
13190 instead we CSE addresses from the GOT table, so skip this. */
13191 emit_insn (gen_movsi (reg, new_rtx));
13192 new_rtx = reg;
13194 else
13196 /* This symbol must be referenced via a load from the
13197 Global Offset Table (@GOT). */
13199 if (reload_in_progress)
13200 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13201 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13202 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13203 if (TARGET_64BIT)
13204 new_rtx = force_reg (Pmode, new_rtx);
13205 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13206 new_rtx = gen_const_mem (Pmode, new_rtx);
13207 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13209 if (reg == 0)
13210 reg = gen_reg_rtx (Pmode);
13211 emit_move_insn (reg, new_rtx);
13212 new_rtx = reg;
13215 else
13217 if (CONST_INT_P (addr)
13218 && !x86_64_immediate_operand (addr, VOIDmode))
13220 if (reg)
13222 emit_move_insn (reg, addr);
13223 new_rtx = reg;
13225 else
13226 new_rtx = force_reg (Pmode, addr);
13228 else if (GET_CODE (addr) == CONST)
13230 addr = XEXP (addr, 0);
13232 /* We must match stuff we generate before. Assume the only
13233 unspecs that can get here are ours. Not that we could do
13234 anything with them anyway.... */
13235 if (GET_CODE (addr) == UNSPEC
13236 || (GET_CODE (addr) == PLUS
13237 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13238 return orig;
13239 gcc_assert (GET_CODE (addr) == PLUS);
13241 if (GET_CODE (addr) == PLUS)
13243 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13245 /* Check first to see if this is a constant offset from a @GOTOFF
13246 symbol reference. */
13247 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13248 && CONST_INT_P (op1))
13250 if (!TARGET_64BIT)
13252 if (reload_in_progress)
13253 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13254 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13255 UNSPEC_GOTOFF);
13256 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13257 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13258 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13260 if (reg != 0)
13262 emit_move_insn (reg, new_rtx);
13263 new_rtx = reg;
13266 else
13268 if (INTVAL (op1) < -16*1024*1024
13269 || INTVAL (op1) >= 16*1024*1024)
13271 if (!x86_64_immediate_operand (op1, Pmode))
13272 op1 = force_reg (Pmode, op1);
13273 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13277 else
13279 rtx base = legitimize_pic_address (op0, reg);
13280 enum machine_mode mode = GET_MODE (base);
13281 new_rtx
13282 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13284 if (CONST_INT_P (new_rtx))
13286 if (INTVAL (new_rtx) < -16*1024*1024
13287 || INTVAL (new_rtx) >= 16*1024*1024)
13289 if (!x86_64_immediate_operand (new_rtx, mode))
13290 new_rtx = force_reg (mode, new_rtx);
13291 new_rtx
13292 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13294 else
13295 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13297 else
13299 if (GET_CODE (new_rtx) == PLUS
13300 && CONSTANT_P (XEXP (new_rtx, 1)))
13302 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13303 new_rtx = XEXP (new_rtx, 1);
13305 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13310 return new_rtx;
13313 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13315 static rtx
13316 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13318 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13320 if (GET_MODE (tp) != tp_mode)
13322 gcc_assert (GET_MODE (tp) == SImode);
13323 gcc_assert (tp_mode == DImode);
13325 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13328 if (to_reg)
13329 tp = copy_to_mode_reg (tp_mode, tp);
13331 return tp;
13334 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13336 static GTY(()) rtx ix86_tls_symbol;
13338 static rtx
13339 ix86_tls_get_addr (void)
13341 if (!ix86_tls_symbol)
13343 const char *sym
13344 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13345 ? "___tls_get_addr" : "__tls_get_addr");
13347 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13350 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13352 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13353 UNSPEC_PLTOFF);
13354 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13355 gen_rtx_CONST (Pmode, unspec));
13358 return ix86_tls_symbol;
13361 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13363 static GTY(()) rtx ix86_tls_module_base_symbol;
13366 ix86_tls_module_base (void)
13368 if (!ix86_tls_module_base_symbol)
13370 ix86_tls_module_base_symbol
13371 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13373 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13374 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13377 return ix86_tls_module_base_symbol;
13380 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13381 false if we expect this to be used for a memory address and true if
13382 we expect to load the address into a register. */
13384 static rtx
13385 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13387 rtx dest, base, off;
13388 rtx pic = NULL_RTX, tp = NULL_RTX;
13389 enum machine_mode tp_mode = Pmode;
13390 int type;
13392 switch (model)
13394 case TLS_MODEL_GLOBAL_DYNAMIC:
13395 dest = gen_reg_rtx (Pmode);
13397 if (!TARGET_64BIT)
13399 if (flag_pic && !TARGET_PECOFF)
13400 pic = pic_offset_table_rtx;
13401 else
13403 pic = gen_reg_rtx (Pmode);
13404 emit_insn (gen_set_got (pic));
13408 if (TARGET_GNU2_TLS)
13410 if (TARGET_64BIT)
13411 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13412 else
13413 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13415 tp = get_thread_pointer (Pmode, true);
13416 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13418 if (GET_MODE (x) != Pmode)
13419 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13421 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13423 else
13425 rtx caddr = ix86_tls_get_addr ();
13427 if (TARGET_64BIT)
13429 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13430 rtx insns;
13432 start_sequence ();
13433 emit_call_insn
13434 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13435 insns = get_insns ();
13436 end_sequence ();
13438 if (GET_MODE (x) != Pmode)
13439 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13441 RTL_CONST_CALL_P (insns) = 1;
13442 emit_libcall_block (insns, dest, rax, x);
13444 else
13445 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13447 break;
13449 case TLS_MODEL_LOCAL_DYNAMIC:
13450 base = gen_reg_rtx (Pmode);
13452 if (!TARGET_64BIT)
13454 if (flag_pic)
13455 pic = pic_offset_table_rtx;
13456 else
13458 pic = gen_reg_rtx (Pmode);
13459 emit_insn (gen_set_got (pic));
13463 if (TARGET_GNU2_TLS)
13465 rtx tmp = ix86_tls_module_base ();
13467 if (TARGET_64BIT)
13468 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13469 else
13470 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13472 tp = get_thread_pointer (Pmode, true);
13473 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13474 gen_rtx_MINUS (Pmode, tmp, tp));
13476 else
13478 rtx caddr = ix86_tls_get_addr ();
13480 if (TARGET_64BIT)
13482 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13483 rtx insns, eqv;
13485 start_sequence ();
13486 emit_call_insn
13487 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13488 insns = get_insns ();
13489 end_sequence ();
13491 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13492 share the LD_BASE result with other LD model accesses. */
13493 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13494 UNSPEC_TLS_LD_BASE);
13496 RTL_CONST_CALL_P (insns) = 1;
13497 emit_libcall_block (insns, base, rax, eqv);
13499 else
13500 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13503 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13504 off = gen_rtx_CONST (Pmode, off);
13506 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13508 if (TARGET_GNU2_TLS)
13510 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13512 if (GET_MODE (x) != Pmode)
13513 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13515 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13517 break;
13519 case TLS_MODEL_INITIAL_EXEC:
13520 if (TARGET_64BIT)
13522 if (TARGET_SUN_TLS && !TARGET_X32)
13524 /* The Sun linker took the AMD64 TLS spec literally
13525 and can only handle %rax as destination of the
13526 initial executable code sequence. */
13528 dest = gen_reg_rtx (DImode);
13529 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13530 return dest;
13533 /* Generate DImode references to avoid %fs:(%reg32)
13534 problems and linker IE->LE relaxation bug. */
13535 tp_mode = DImode;
13536 pic = NULL;
13537 type = UNSPEC_GOTNTPOFF;
13539 else if (flag_pic)
13541 if (reload_in_progress)
13542 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13543 pic = pic_offset_table_rtx;
13544 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13546 else if (!TARGET_ANY_GNU_TLS)
13548 pic = gen_reg_rtx (Pmode);
13549 emit_insn (gen_set_got (pic));
13550 type = UNSPEC_GOTTPOFF;
13552 else
13554 pic = NULL;
13555 type = UNSPEC_INDNTPOFF;
13558 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13559 off = gen_rtx_CONST (tp_mode, off);
13560 if (pic)
13561 off = gen_rtx_PLUS (tp_mode, pic, off);
13562 off = gen_const_mem (tp_mode, off);
13563 set_mem_alias_set (off, ix86_GOT_alias_set ());
13565 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13567 base = get_thread_pointer (tp_mode,
13568 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13569 off = force_reg (tp_mode, off);
13570 return gen_rtx_PLUS (tp_mode, base, off);
13572 else
13574 base = get_thread_pointer (Pmode, true);
13575 dest = gen_reg_rtx (Pmode);
13576 emit_insn (ix86_gen_sub3 (dest, base, off));
13578 break;
13580 case TLS_MODEL_LOCAL_EXEC:
13581 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13582 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13583 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13584 off = gen_rtx_CONST (Pmode, off);
13586 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13588 base = get_thread_pointer (Pmode,
13589 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13590 return gen_rtx_PLUS (Pmode, base, off);
13592 else
13594 base = get_thread_pointer (Pmode, true);
13595 dest = gen_reg_rtx (Pmode);
13596 emit_insn (ix86_gen_sub3 (dest, base, off));
13598 break;
13600 default:
13601 gcc_unreachable ();
13604 return dest;
13607 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13608 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13609 unique refptr-DECL symbol corresponding to symbol DECL. */
13611 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13612 htab_t dllimport_map;
13614 static tree
13615 get_dllimport_decl (tree decl, bool beimport)
13617 struct tree_map *h, in;
13618 void **loc;
13619 const char *name;
13620 const char *prefix;
13621 size_t namelen, prefixlen;
13622 char *imp_name;
13623 tree to;
13624 rtx rtl;
13626 if (!dllimport_map)
13627 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13629 in.hash = htab_hash_pointer (decl);
13630 in.base.from = decl;
13631 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13632 h = (struct tree_map *) *loc;
13633 if (h)
13634 return h->to;
13636 *loc = h = ggc_alloc_tree_map ();
13637 h->hash = in.hash;
13638 h->base.from = decl;
13639 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13640 VAR_DECL, NULL, ptr_type_node);
13641 DECL_ARTIFICIAL (to) = 1;
13642 DECL_IGNORED_P (to) = 1;
13643 DECL_EXTERNAL (to) = 1;
13644 TREE_READONLY (to) = 1;
13646 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13647 name = targetm.strip_name_encoding (name);
13648 if (beimport)
13649 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13650 ? "*__imp_" : "*__imp__";
13651 else
13652 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13653 namelen = strlen (name);
13654 prefixlen = strlen (prefix);
13655 imp_name = (char *) alloca (namelen + prefixlen + 1);
13656 memcpy (imp_name, prefix, prefixlen);
13657 memcpy (imp_name + prefixlen, name, namelen + 1);
13659 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13660 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13661 SET_SYMBOL_REF_DECL (rtl, to);
13662 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13663 if (!beimport)
13665 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13666 #ifdef SUB_TARGET_RECORD_STUB
13667 SUB_TARGET_RECORD_STUB (name);
13668 #endif
13671 rtl = gen_const_mem (Pmode, rtl);
13672 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13674 SET_DECL_RTL (to, rtl);
13675 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13677 return to;
13680 /* Expand SYMBOL into its corresponding far-addresse symbol.
13681 WANT_REG is true if we require the result be a register. */
13683 static rtx
13684 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13686 tree imp_decl;
13687 rtx x;
13689 gcc_assert (SYMBOL_REF_DECL (symbol));
13690 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13692 x = DECL_RTL (imp_decl);
13693 if (want_reg)
13694 x = force_reg (Pmode, x);
13695 return x;
13698 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13699 true if we require the result be a register. */
13701 static rtx
13702 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13704 tree imp_decl;
13705 rtx x;
13707 gcc_assert (SYMBOL_REF_DECL (symbol));
13708 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13710 x = DECL_RTL (imp_decl);
13711 if (want_reg)
13712 x = force_reg (Pmode, x);
13713 return x;
13716 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13717 is true if we require the result be a register. */
13719 static rtx
13720 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13722 if (!TARGET_PECOFF)
13723 return NULL_RTX;
13725 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13727 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13728 return legitimize_dllimport_symbol (addr, inreg);
13729 if (GET_CODE (addr) == CONST
13730 && GET_CODE (XEXP (addr, 0)) == PLUS
13731 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13732 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13734 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13735 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13739 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13740 return NULL_RTX;
13741 if (GET_CODE (addr) == SYMBOL_REF
13742 && !is_imported_p (addr)
13743 && SYMBOL_REF_EXTERNAL_P (addr)
13744 && SYMBOL_REF_DECL (addr))
13745 return legitimize_pe_coff_extern_decl (addr, inreg);
13747 if (GET_CODE (addr) == CONST
13748 && GET_CODE (XEXP (addr, 0)) == PLUS
13749 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13750 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13751 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13752 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13754 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13755 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13757 return NULL_RTX;
13760 /* Try machine-dependent ways of modifying an illegitimate address
13761 to be legitimate. If we find one, return the new, valid address.
13762 This macro is used in only one place: `memory_address' in explow.c.
13764 OLDX is the address as it was before break_out_memory_refs was called.
13765 In some cases it is useful to look at this to decide what needs to be done.
13767 It is always safe for this macro to do nothing. It exists to recognize
13768 opportunities to optimize the output.
13770 For the 80386, we handle X+REG by loading X into a register R and
13771 using R+REG. R will go in a general reg and indexing will be used.
13772 However, if REG is a broken-out memory address or multiplication,
13773 nothing needs to be done because REG can certainly go in a general reg.
13775 When -fpic is used, special handling is needed for symbolic references.
13776 See comments by legitimize_pic_address in i386.c for details. */
13778 static rtx
13779 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13780 enum machine_mode mode)
13782 int changed = 0;
13783 unsigned log;
13785 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13786 if (log)
13787 return legitimize_tls_address (x, (enum tls_model) log, false);
13788 if (GET_CODE (x) == CONST
13789 && GET_CODE (XEXP (x, 0)) == PLUS
13790 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13791 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13793 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13794 (enum tls_model) log, false);
13795 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13798 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13800 rtx tmp = legitimize_pe_coff_symbol (x, true);
13801 if (tmp)
13802 return tmp;
13805 if (flag_pic && SYMBOLIC_CONST (x))
13806 return legitimize_pic_address (x, 0);
13808 #if TARGET_MACHO
13809 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13810 return machopic_indirect_data_reference (x, 0);
13811 #endif
13813 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13814 if (GET_CODE (x) == ASHIFT
13815 && CONST_INT_P (XEXP (x, 1))
13816 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13818 changed = 1;
13819 log = INTVAL (XEXP (x, 1));
13820 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13821 GEN_INT (1 << log));
13824 if (GET_CODE (x) == PLUS)
13826 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13828 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13829 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13830 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13832 changed = 1;
13833 log = INTVAL (XEXP (XEXP (x, 0), 1));
13834 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13835 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13836 GEN_INT (1 << log));
13839 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13840 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13841 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13843 changed = 1;
13844 log = INTVAL (XEXP (XEXP (x, 1), 1));
13845 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13846 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13847 GEN_INT (1 << log));
13850 /* Put multiply first if it isn't already. */
13851 if (GET_CODE (XEXP (x, 1)) == MULT)
13853 rtx tmp = XEXP (x, 0);
13854 XEXP (x, 0) = XEXP (x, 1);
13855 XEXP (x, 1) = tmp;
13856 changed = 1;
13859 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13860 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13861 created by virtual register instantiation, register elimination, and
13862 similar optimizations. */
13863 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13865 changed = 1;
13866 x = gen_rtx_PLUS (Pmode,
13867 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13868 XEXP (XEXP (x, 1), 0)),
13869 XEXP (XEXP (x, 1), 1));
13872 /* Canonicalize
13873 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13874 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13875 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13876 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13877 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13878 && CONSTANT_P (XEXP (x, 1)))
13880 rtx constant;
13881 rtx other = NULL_RTX;
13883 if (CONST_INT_P (XEXP (x, 1)))
13885 constant = XEXP (x, 1);
13886 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13888 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13890 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13891 other = XEXP (x, 1);
13893 else
13894 constant = 0;
13896 if (constant)
13898 changed = 1;
13899 x = gen_rtx_PLUS (Pmode,
13900 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13901 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13902 plus_constant (Pmode, other,
13903 INTVAL (constant)));
13907 if (changed && ix86_legitimate_address_p (mode, x, false))
13908 return x;
13910 if (GET_CODE (XEXP (x, 0)) == MULT)
13912 changed = 1;
13913 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13916 if (GET_CODE (XEXP (x, 1)) == MULT)
13918 changed = 1;
13919 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13922 if (changed
13923 && REG_P (XEXP (x, 1))
13924 && REG_P (XEXP (x, 0)))
13925 return x;
13927 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13929 changed = 1;
13930 x = legitimize_pic_address (x, 0);
13933 if (changed && ix86_legitimate_address_p (mode, x, false))
13934 return x;
13936 if (REG_P (XEXP (x, 0)))
13938 rtx temp = gen_reg_rtx (Pmode);
13939 rtx val = force_operand (XEXP (x, 1), temp);
13940 if (val != temp)
13942 val = convert_to_mode (Pmode, val, 1);
13943 emit_move_insn (temp, val);
13946 XEXP (x, 1) = temp;
13947 return x;
13950 else if (REG_P (XEXP (x, 1)))
13952 rtx temp = gen_reg_rtx (Pmode);
13953 rtx val = force_operand (XEXP (x, 0), temp);
13954 if (val != temp)
13956 val = convert_to_mode (Pmode, val, 1);
13957 emit_move_insn (temp, val);
13960 XEXP (x, 0) = temp;
13961 return x;
13965 return x;
13968 /* Print an integer constant expression in assembler syntax. Addition
13969 and subtraction are the only arithmetic that may appear in these
13970 expressions. FILE is the stdio stream to write to, X is the rtx, and
13971 CODE is the operand print code from the output string. */
13973 static void
13974 output_pic_addr_const (FILE *file, rtx x, int code)
13976 char buf[256];
13978 switch (GET_CODE (x))
13980 case PC:
13981 gcc_assert (flag_pic);
13982 putc ('.', file);
13983 break;
13985 case SYMBOL_REF:
13986 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13987 output_addr_const (file, x);
13988 else
13990 const char *name = XSTR (x, 0);
13992 /* Mark the decl as referenced so that cgraph will
13993 output the function. */
13994 if (SYMBOL_REF_DECL (x))
13995 mark_decl_referenced (SYMBOL_REF_DECL (x));
13997 #if TARGET_MACHO
13998 if (MACHOPIC_INDIRECT
13999 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14000 name = machopic_indirection_name (x, /*stub_p=*/true);
14001 #endif
14002 assemble_name (file, name);
14004 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14005 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14006 fputs ("@PLT", file);
14007 break;
14009 case LABEL_REF:
14010 x = XEXP (x, 0);
14011 /* FALLTHRU */
14012 case CODE_LABEL:
14013 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14014 assemble_name (asm_out_file, buf);
14015 break;
14017 case CONST_INT:
14018 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14019 break;
14021 case CONST:
14022 /* This used to output parentheses around the expression,
14023 but that does not work on the 386 (either ATT or BSD assembler). */
14024 output_pic_addr_const (file, XEXP (x, 0), code);
14025 break;
14027 case CONST_DOUBLE:
14028 if (GET_MODE (x) == VOIDmode)
14030 /* We can use %d if the number is <32 bits and positive. */
14031 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14032 fprintf (file, "0x%lx%08lx",
14033 (unsigned long) CONST_DOUBLE_HIGH (x),
14034 (unsigned long) CONST_DOUBLE_LOW (x));
14035 else
14036 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14038 else
14039 /* We can't handle floating point constants;
14040 TARGET_PRINT_OPERAND must handle them. */
14041 output_operand_lossage ("floating constant misused");
14042 break;
14044 case PLUS:
14045 /* Some assemblers need integer constants to appear first. */
14046 if (CONST_INT_P (XEXP (x, 0)))
14048 output_pic_addr_const (file, XEXP (x, 0), code);
14049 putc ('+', file);
14050 output_pic_addr_const (file, XEXP (x, 1), code);
14052 else
14054 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14055 output_pic_addr_const (file, XEXP (x, 1), code);
14056 putc ('+', file);
14057 output_pic_addr_const (file, XEXP (x, 0), code);
14059 break;
14061 case MINUS:
14062 if (!TARGET_MACHO)
14063 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14064 output_pic_addr_const (file, XEXP (x, 0), code);
14065 putc ('-', file);
14066 output_pic_addr_const (file, XEXP (x, 1), code);
14067 if (!TARGET_MACHO)
14068 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14069 break;
14071 case UNSPEC:
14072 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14074 bool f = i386_asm_output_addr_const_extra (file, x);
14075 gcc_assert (f);
14076 break;
14079 gcc_assert (XVECLEN (x, 0) == 1);
14080 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14081 switch (XINT (x, 1))
14083 case UNSPEC_GOT:
14084 fputs ("@GOT", file);
14085 break;
14086 case UNSPEC_GOTOFF:
14087 fputs ("@GOTOFF", file);
14088 break;
14089 case UNSPEC_PLTOFF:
14090 fputs ("@PLTOFF", file);
14091 break;
14092 case UNSPEC_PCREL:
14093 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14094 "(%rip)" : "[rip]", file);
14095 break;
14096 case UNSPEC_GOTPCREL:
14097 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14098 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14099 break;
14100 case UNSPEC_GOTTPOFF:
14101 /* FIXME: This might be @TPOFF in Sun ld too. */
14102 fputs ("@gottpoff", file);
14103 break;
14104 case UNSPEC_TPOFF:
14105 fputs ("@tpoff", file);
14106 break;
14107 case UNSPEC_NTPOFF:
14108 if (TARGET_64BIT)
14109 fputs ("@tpoff", file);
14110 else
14111 fputs ("@ntpoff", file);
14112 break;
14113 case UNSPEC_DTPOFF:
14114 fputs ("@dtpoff", file);
14115 break;
14116 case UNSPEC_GOTNTPOFF:
14117 if (TARGET_64BIT)
14118 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14119 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14120 else
14121 fputs ("@gotntpoff", file);
14122 break;
14123 case UNSPEC_INDNTPOFF:
14124 fputs ("@indntpoff", file);
14125 break;
14126 #if TARGET_MACHO
14127 case UNSPEC_MACHOPIC_OFFSET:
14128 putc ('-', file);
14129 machopic_output_function_base_name (file);
14130 break;
14131 #endif
14132 default:
14133 output_operand_lossage ("invalid UNSPEC as operand");
14134 break;
14136 break;
14138 default:
14139 output_operand_lossage ("invalid expression as operand");
14143 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14144 We need to emit DTP-relative relocations. */
14146 static void ATTRIBUTE_UNUSED
14147 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14149 fputs (ASM_LONG, file);
14150 output_addr_const (file, x);
14151 fputs ("@dtpoff", file);
14152 switch (size)
14154 case 4:
14155 break;
14156 case 8:
14157 fputs (", 0", file);
14158 break;
14159 default:
14160 gcc_unreachable ();
14164 /* Return true if X is a representation of the PIC register. This copes
14165 with calls from ix86_find_base_term, where the register might have
14166 been replaced by a cselib value. */
14168 static bool
14169 ix86_pic_register_p (rtx x)
14171 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14172 return (pic_offset_table_rtx
14173 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14174 else
14175 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14178 /* Helper function for ix86_delegitimize_address.
14179 Attempt to delegitimize TLS local-exec accesses. */
14181 static rtx
14182 ix86_delegitimize_tls_address (rtx orig_x)
14184 rtx x = orig_x, unspec;
14185 struct ix86_address addr;
14187 if (!TARGET_TLS_DIRECT_SEG_REFS)
14188 return orig_x;
14189 if (MEM_P (x))
14190 x = XEXP (x, 0);
14191 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14192 return orig_x;
14193 if (ix86_decompose_address (x, &addr) == 0
14194 || addr.seg != DEFAULT_TLS_SEG_REG
14195 || addr.disp == NULL_RTX
14196 || GET_CODE (addr.disp) != CONST)
14197 return orig_x;
14198 unspec = XEXP (addr.disp, 0);
14199 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14200 unspec = XEXP (unspec, 0);
14201 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14202 return orig_x;
14203 x = XVECEXP (unspec, 0, 0);
14204 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14205 if (unspec != XEXP (addr.disp, 0))
14206 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14207 if (addr.index)
14209 rtx idx = addr.index;
14210 if (addr.scale != 1)
14211 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14212 x = gen_rtx_PLUS (Pmode, idx, x);
14214 if (addr.base)
14215 x = gen_rtx_PLUS (Pmode, addr.base, x);
14216 if (MEM_P (orig_x))
14217 x = replace_equiv_address_nv (orig_x, x);
14218 return x;
14221 /* In the name of slightly smaller debug output, and to cater to
14222 general assembler lossage, recognize PIC+GOTOFF and turn it back
14223 into a direct symbol reference.
14225 On Darwin, this is necessary to avoid a crash, because Darwin
14226 has a different PIC label for each routine but the DWARF debugging
14227 information is not associated with any particular routine, so it's
14228 necessary to remove references to the PIC label from RTL stored by
14229 the DWARF output code. */
14231 static rtx
14232 ix86_delegitimize_address (rtx x)
14234 rtx orig_x = delegitimize_mem_from_attrs (x);
14235 /* addend is NULL or some rtx if x is something+GOTOFF where
14236 something doesn't include the PIC register. */
14237 rtx addend = NULL_RTX;
14238 /* reg_addend is NULL or a multiple of some register. */
14239 rtx reg_addend = NULL_RTX;
14240 /* const_addend is NULL or a const_int. */
14241 rtx const_addend = NULL_RTX;
14242 /* This is the result, or NULL. */
14243 rtx result = NULL_RTX;
14245 x = orig_x;
14247 if (MEM_P (x))
14248 x = XEXP (x, 0);
14250 if (TARGET_64BIT)
14252 if (GET_CODE (x) == CONST
14253 && GET_CODE (XEXP (x, 0)) == PLUS
14254 && GET_MODE (XEXP (x, 0)) == Pmode
14255 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14256 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14257 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14259 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14260 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14261 if (MEM_P (orig_x))
14262 x = replace_equiv_address_nv (orig_x, x);
14263 return x;
14266 if (GET_CODE (x) == CONST
14267 && GET_CODE (XEXP (x, 0)) == UNSPEC
14268 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14269 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14270 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14272 x = XVECEXP (XEXP (x, 0), 0, 0);
14273 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14275 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14276 GET_MODE (x), 0);
14277 if (x == NULL_RTX)
14278 return orig_x;
14280 return x;
14283 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14284 return ix86_delegitimize_tls_address (orig_x);
14286 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14287 and -mcmodel=medium -fpic. */
14290 if (GET_CODE (x) != PLUS
14291 || GET_CODE (XEXP (x, 1)) != CONST)
14292 return ix86_delegitimize_tls_address (orig_x);
14294 if (ix86_pic_register_p (XEXP (x, 0)))
14295 /* %ebx + GOT/GOTOFF */
14297 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14299 /* %ebx + %reg * scale + GOT/GOTOFF */
14300 reg_addend = XEXP (x, 0);
14301 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14302 reg_addend = XEXP (reg_addend, 1);
14303 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14304 reg_addend = XEXP (reg_addend, 0);
14305 else
14307 reg_addend = NULL_RTX;
14308 addend = XEXP (x, 0);
14311 else
14312 addend = XEXP (x, 0);
14314 x = XEXP (XEXP (x, 1), 0);
14315 if (GET_CODE (x) == PLUS
14316 && CONST_INT_P (XEXP (x, 1)))
14318 const_addend = XEXP (x, 1);
14319 x = XEXP (x, 0);
14322 if (GET_CODE (x) == UNSPEC
14323 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14324 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14325 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14326 && !MEM_P (orig_x) && !addend)))
14327 result = XVECEXP (x, 0, 0);
14329 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14330 && !MEM_P (orig_x))
14331 result = XVECEXP (x, 0, 0);
14333 if (! result)
14334 return ix86_delegitimize_tls_address (orig_x);
14336 if (const_addend)
14337 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14338 if (reg_addend)
14339 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14340 if (addend)
14342 /* If the rest of original X doesn't involve the PIC register, add
14343 addend and subtract pic_offset_table_rtx. This can happen e.g.
14344 for code like:
14345 leal (%ebx, %ecx, 4), %ecx
14347 movl foo@GOTOFF(%ecx), %edx
14348 in which case we return (%ecx - %ebx) + foo. */
14349 if (pic_offset_table_rtx)
14350 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14351 pic_offset_table_rtx),
14352 result);
14353 else
14354 return orig_x;
14356 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14358 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14359 if (result == NULL_RTX)
14360 return orig_x;
14362 return result;
14365 /* If X is a machine specific address (i.e. a symbol or label being
14366 referenced as a displacement from the GOT implemented using an
14367 UNSPEC), then return the base term. Otherwise return X. */
14370 ix86_find_base_term (rtx x)
14372 rtx term;
14374 if (TARGET_64BIT)
14376 if (GET_CODE (x) != CONST)
14377 return x;
14378 term = XEXP (x, 0);
14379 if (GET_CODE (term) == PLUS
14380 && (CONST_INT_P (XEXP (term, 1))
14381 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14382 term = XEXP (term, 0);
14383 if (GET_CODE (term) != UNSPEC
14384 || (XINT (term, 1) != UNSPEC_GOTPCREL
14385 && XINT (term, 1) != UNSPEC_PCREL))
14386 return x;
14388 return XVECEXP (term, 0, 0);
14391 return ix86_delegitimize_address (x);
14394 static void
14395 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14396 bool fp, FILE *file)
14398 const char *suffix;
14400 if (mode == CCFPmode || mode == CCFPUmode)
14402 code = ix86_fp_compare_code_to_integer (code);
14403 mode = CCmode;
14405 if (reverse)
14406 code = reverse_condition (code);
14408 switch (code)
14410 case EQ:
14411 switch (mode)
14413 case CCAmode:
14414 suffix = "a";
14415 break;
14417 case CCCmode:
14418 suffix = "c";
14419 break;
14421 case CCOmode:
14422 suffix = "o";
14423 break;
14425 case CCSmode:
14426 suffix = "s";
14427 break;
14429 default:
14430 suffix = "e";
14432 break;
14433 case NE:
14434 switch (mode)
14436 case CCAmode:
14437 suffix = "na";
14438 break;
14440 case CCCmode:
14441 suffix = "nc";
14442 break;
14444 case CCOmode:
14445 suffix = "no";
14446 break;
14448 case CCSmode:
14449 suffix = "ns";
14450 break;
14452 default:
14453 suffix = "ne";
14455 break;
14456 case GT:
14457 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14458 suffix = "g";
14459 break;
14460 case GTU:
14461 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14462 Those same assemblers have the same but opposite lossage on cmov. */
14463 if (mode == CCmode)
14464 suffix = fp ? "nbe" : "a";
14465 else
14466 gcc_unreachable ();
14467 break;
14468 case LT:
14469 switch (mode)
14471 case CCNOmode:
14472 case CCGOCmode:
14473 suffix = "s";
14474 break;
14476 case CCmode:
14477 case CCGCmode:
14478 suffix = "l";
14479 break;
14481 default:
14482 gcc_unreachable ();
14484 break;
14485 case LTU:
14486 if (mode == CCmode)
14487 suffix = "b";
14488 else if (mode == CCCmode)
14489 suffix = "c";
14490 else
14491 gcc_unreachable ();
14492 break;
14493 case GE:
14494 switch (mode)
14496 case CCNOmode:
14497 case CCGOCmode:
14498 suffix = "ns";
14499 break;
14501 case CCmode:
14502 case CCGCmode:
14503 suffix = "ge";
14504 break;
14506 default:
14507 gcc_unreachable ();
14509 break;
14510 case GEU:
14511 if (mode == CCmode)
14512 suffix = fp ? "nb" : "ae";
14513 else if (mode == CCCmode)
14514 suffix = "nc";
14515 else
14516 gcc_unreachable ();
14517 break;
14518 case LE:
14519 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14520 suffix = "le";
14521 break;
14522 case LEU:
14523 if (mode == CCmode)
14524 suffix = "be";
14525 else
14526 gcc_unreachable ();
14527 break;
14528 case UNORDERED:
14529 suffix = fp ? "u" : "p";
14530 break;
14531 case ORDERED:
14532 suffix = fp ? "nu" : "np";
14533 break;
14534 default:
14535 gcc_unreachable ();
14537 fputs (suffix, file);
14540 /* Print the name of register X to FILE based on its machine mode and number.
14541 If CODE is 'w', pretend the mode is HImode.
14542 If CODE is 'b', pretend the mode is QImode.
14543 If CODE is 'k', pretend the mode is SImode.
14544 If CODE is 'q', pretend the mode is DImode.
14545 If CODE is 'x', pretend the mode is V4SFmode.
14546 If CODE is 't', pretend the mode is V8SFmode.
14547 If CODE is 'g', pretend the mode is V16SFmode.
14548 If CODE is 'h', pretend the reg is the 'high' byte register.
14549 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14550 If CODE is 'd', duplicate the operand for AVX instruction.
14553 void
14554 print_reg (rtx x, int code, FILE *file)
14556 const char *reg;
14557 unsigned int regno;
14558 bool duplicated = code == 'd' && TARGET_AVX;
14560 if (ASSEMBLER_DIALECT == ASM_ATT)
14561 putc ('%', file);
14563 if (x == pc_rtx)
14565 gcc_assert (TARGET_64BIT);
14566 fputs ("rip", file);
14567 return;
14570 regno = true_regnum (x);
14571 gcc_assert (regno != ARG_POINTER_REGNUM
14572 && regno != FRAME_POINTER_REGNUM
14573 && regno != FLAGS_REG
14574 && regno != FPSR_REG
14575 && regno != FPCR_REG);
14577 if (code == 'w' || MMX_REG_P (x))
14578 code = 2;
14579 else if (code == 'b')
14580 code = 1;
14581 else if (code == 'k')
14582 code = 4;
14583 else if (code == 'q')
14584 code = 8;
14585 else if (code == 'y')
14586 code = 3;
14587 else if (code == 'h')
14588 code = 0;
14589 else if (code == 'x')
14590 code = 16;
14591 else if (code == 't')
14592 code = 32;
14593 else if (code == 'g')
14594 code = 64;
14595 else
14596 code = GET_MODE_SIZE (GET_MODE (x));
14598 /* Irritatingly, AMD extended registers use different naming convention
14599 from the normal registers: "r%d[bwd]" */
14600 if (REX_INT_REGNO_P (regno))
14602 gcc_assert (TARGET_64BIT);
14603 putc ('r', file);
14604 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14605 switch (code)
14607 case 0:
14608 error ("extended registers have no high halves");
14609 break;
14610 case 1:
14611 putc ('b', file);
14612 break;
14613 case 2:
14614 putc ('w', file);
14615 break;
14616 case 4:
14617 putc ('d', file);
14618 break;
14619 case 8:
14620 /* no suffix */
14621 break;
14622 default:
14623 error ("unsupported operand size for extended register");
14624 break;
14626 return;
14629 reg = NULL;
14630 switch (code)
14632 case 3:
14633 if (STACK_TOP_P (x))
14635 reg = "st(0)";
14636 break;
14638 /* FALLTHRU */
14639 case 8:
14640 case 4:
14641 case 12:
14642 if (! ANY_FP_REG_P (x))
14643 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14644 /* FALLTHRU */
14645 case 16:
14646 case 2:
14647 normal:
14648 reg = hi_reg_name[regno];
14649 break;
14650 case 1:
14651 if (regno >= ARRAY_SIZE (qi_reg_name))
14652 goto normal;
14653 reg = qi_reg_name[regno];
14654 break;
14655 case 0:
14656 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14657 goto normal;
14658 reg = qi_high_reg_name[regno];
14659 break;
14660 case 32:
14661 if (SSE_REG_P (x))
14663 gcc_assert (!duplicated);
14664 putc ('y', file);
14665 fputs (hi_reg_name[regno] + 1, file);
14666 return;
14668 case 64:
14669 if (SSE_REG_P (x))
14671 gcc_assert (!duplicated);
14672 putc ('z', file);
14673 fputs (hi_reg_name[REGNO (x)] + 1, file);
14674 return;
14676 break;
14677 default:
14678 gcc_unreachable ();
14681 fputs (reg, file);
14682 if (duplicated)
14684 if (ASSEMBLER_DIALECT == ASM_ATT)
14685 fprintf (file, ", %%%s", reg);
14686 else
14687 fprintf (file, ", %s", reg);
14691 /* Locate some local-dynamic symbol still in use by this function
14692 so that we can print its name in some tls_local_dynamic_base
14693 pattern. */
14695 static int
14696 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14698 rtx x = *px;
14700 if (GET_CODE (x) == SYMBOL_REF
14701 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14703 cfun->machine->some_ld_name = XSTR (x, 0);
14704 return 1;
14707 return 0;
14710 static const char *
14711 get_some_local_dynamic_name (void)
14713 rtx insn;
14715 if (cfun->machine->some_ld_name)
14716 return cfun->machine->some_ld_name;
14718 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14719 if (NONDEBUG_INSN_P (insn)
14720 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14721 return cfun->machine->some_ld_name;
14723 return NULL;
14726 /* Meaning of CODE:
14727 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14728 C -- print opcode suffix for set/cmov insn.
14729 c -- like C, but print reversed condition
14730 F,f -- likewise, but for floating-point.
14731 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14732 otherwise nothing
14733 R -- print embeded rounding and sae.
14734 r -- print only sae.
14735 z -- print the opcode suffix for the size of the current operand.
14736 Z -- likewise, with special suffixes for x87 instructions.
14737 * -- print a star (in certain assembler syntax)
14738 A -- print an absolute memory reference.
14739 E -- print address with DImode register names if TARGET_64BIT.
14740 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14741 s -- print a shift double count, followed by the assemblers argument
14742 delimiter.
14743 b -- print the QImode name of the register for the indicated operand.
14744 %b0 would print %al if operands[0] is reg 0.
14745 w -- likewise, print the HImode name of the register.
14746 k -- likewise, print the SImode name of the register.
14747 q -- likewise, print the DImode name of the register.
14748 x -- likewise, print the V4SFmode name of the register.
14749 t -- likewise, print the V8SFmode name of the register.
14750 g -- likewise, print the V16SFmode name of the register.
14751 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14752 y -- print "st(0)" instead of "st" as a register.
14753 d -- print duplicated register operand for AVX instruction.
14754 D -- print condition for SSE cmp instruction.
14755 P -- if PIC, print an @PLT suffix.
14756 p -- print raw symbol name.
14757 X -- don't print any sort of PIC '@' suffix for a symbol.
14758 & -- print some in-use local-dynamic symbol name.
14759 H -- print a memory address offset by 8; used for sse high-parts
14760 Y -- print condition for XOP pcom* instruction.
14761 + -- print a branch hint as 'cs' or 'ds' prefix
14762 ; -- print a semicolon (after prefixes due to bug in older gas).
14763 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14764 @ -- print a segment register of thread base pointer load
14765 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14768 void
14769 ix86_print_operand (FILE *file, rtx x, int code)
14771 if (code)
14773 switch (code)
14775 case 'A':
14776 switch (ASSEMBLER_DIALECT)
14778 case ASM_ATT:
14779 putc ('*', file);
14780 break;
14782 case ASM_INTEL:
14783 /* Intel syntax. For absolute addresses, registers should not
14784 be surrounded by braces. */
14785 if (!REG_P (x))
14787 putc ('[', file);
14788 ix86_print_operand (file, x, 0);
14789 putc (']', file);
14790 return;
14792 break;
14794 default:
14795 gcc_unreachable ();
14798 ix86_print_operand (file, x, 0);
14799 return;
14801 case 'E':
14802 /* Wrap address in an UNSPEC to declare special handling. */
14803 if (TARGET_64BIT)
14804 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14806 output_address (x);
14807 return;
14809 case 'L':
14810 if (ASSEMBLER_DIALECT == ASM_ATT)
14811 putc ('l', file);
14812 return;
14814 case 'W':
14815 if (ASSEMBLER_DIALECT == ASM_ATT)
14816 putc ('w', file);
14817 return;
14819 case 'B':
14820 if (ASSEMBLER_DIALECT == ASM_ATT)
14821 putc ('b', file);
14822 return;
14824 case 'Q':
14825 if (ASSEMBLER_DIALECT == ASM_ATT)
14826 putc ('l', file);
14827 return;
14829 case 'S':
14830 if (ASSEMBLER_DIALECT == ASM_ATT)
14831 putc ('s', file);
14832 return;
14834 case 'T':
14835 if (ASSEMBLER_DIALECT == ASM_ATT)
14836 putc ('t', file);
14837 return;
14839 case 'O':
14840 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14841 if (ASSEMBLER_DIALECT != ASM_ATT)
14842 return;
14844 switch (GET_MODE_SIZE (GET_MODE (x)))
14846 case 2:
14847 putc ('w', file);
14848 break;
14850 case 4:
14851 putc ('l', file);
14852 break;
14854 case 8:
14855 putc ('q', file);
14856 break;
14858 default:
14859 output_operand_lossage
14860 ("invalid operand size for operand code 'O'");
14861 return;
14864 putc ('.', file);
14865 #endif
14866 return;
14868 case 'z':
14869 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14871 /* Opcodes don't get size suffixes if using Intel opcodes. */
14872 if (ASSEMBLER_DIALECT == ASM_INTEL)
14873 return;
14875 switch (GET_MODE_SIZE (GET_MODE (x)))
14877 case 1:
14878 putc ('b', file);
14879 return;
14881 case 2:
14882 putc ('w', file);
14883 return;
14885 case 4:
14886 putc ('l', file);
14887 return;
14889 case 8:
14890 putc ('q', file);
14891 return;
14893 default:
14894 output_operand_lossage
14895 ("invalid operand size for operand code 'z'");
14896 return;
14900 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14901 warning
14902 (0, "non-integer operand used with operand code 'z'");
14903 /* FALLTHRU */
14905 case 'Z':
14906 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14907 if (ASSEMBLER_DIALECT == ASM_INTEL)
14908 return;
14910 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14912 switch (GET_MODE_SIZE (GET_MODE (x)))
14914 case 2:
14915 #ifdef HAVE_AS_IX86_FILDS
14916 putc ('s', file);
14917 #endif
14918 return;
14920 case 4:
14921 putc ('l', file);
14922 return;
14924 case 8:
14925 #ifdef HAVE_AS_IX86_FILDQ
14926 putc ('q', file);
14927 #else
14928 fputs ("ll", file);
14929 #endif
14930 return;
14932 default:
14933 break;
14936 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14938 /* 387 opcodes don't get size suffixes
14939 if the operands are registers. */
14940 if (STACK_REG_P (x))
14941 return;
14943 switch (GET_MODE_SIZE (GET_MODE (x)))
14945 case 4:
14946 putc ('s', file);
14947 return;
14949 case 8:
14950 putc ('l', file);
14951 return;
14953 case 12:
14954 case 16:
14955 putc ('t', file);
14956 return;
14958 default:
14959 break;
14962 else
14964 output_operand_lossage
14965 ("invalid operand type used with operand code 'Z'");
14966 return;
14969 output_operand_lossage
14970 ("invalid operand size for operand code 'Z'");
14971 return;
14973 case 'd':
14974 case 'b':
14975 case 'w':
14976 case 'k':
14977 case 'q':
14978 case 'h':
14979 case 't':
14980 case 'g':
14981 case 'y':
14982 case 'x':
14983 case 'X':
14984 case 'P':
14985 case 'p':
14986 break;
14988 case 's':
14989 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14991 ix86_print_operand (file, x, 0);
14992 fputs (", ", file);
14994 return;
14996 case 'Y':
14997 switch (GET_CODE (x))
14999 case NE:
15000 fputs ("neq", file);
15001 break;
15002 case EQ:
15003 fputs ("eq", file);
15004 break;
15005 case GE:
15006 case GEU:
15007 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15008 break;
15009 case GT:
15010 case GTU:
15011 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15012 break;
15013 case LE:
15014 case LEU:
15015 fputs ("le", file);
15016 break;
15017 case LT:
15018 case LTU:
15019 fputs ("lt", file);
15020 break;
15021 case UNORDERED:
15022 fputs ("unord", file);
15023 break;
15024 case ORDERED:
15025 fputs ("ord", file);
15026 break;
15027 case UNEQ:
15028 fputs ("ueq", file);
15029 break;
15030 case UNGE:
15031 fputs ("nlt", file);
15032 break;
15033 case UNGT:
15034 fputs ("nle", file);
15035 break;
15036 case UNLE:
15037 fputs ("ule", file);
15038 break;
15039 case UNLT:
15040 fputs ("ult", file);
15041 break;
15042 case LTGT:
15043 fputs ("une", file);
15044 break;
15045 default:
15046 output_operand_lossage ("operand is not a condition code, "
15047 "invalid operand code 'Y'");
15048 return;
15050 return;
15052 case 'D':
15053 /* Little bit of braindamage here. The SSE compare instructions
15054 does use completely different names for the comparisons that the
15055 fp conditional moves. */
15056 switch (GET_CODE (x))
15058 case UNEQ:
15059 if (TARGET_AVX)
15061 fputs ("eq_us", file);
15062 break;
15064 case EQ:
15065 fputs ("eq", file);
15066 break;
15067 case UNLT:
15068 if (TARGET_AVX)
15070 fputs ("nge", file);
15071 break;
15073 case LT:
15074 fputs ("lt", file);
15075 break;
15076 case UNLE:
15077 if (TARGET_AVX)
15079 fputs ("ngt", file);
15080 break;
15082 case LE:
15083 fputs ("le", file);
15084 break;
15085 case UNORDERED:
15086 fputs ("unord", file);
15087 break;
15088 case LTGT:
15089 if (TARGET_AVX)
15091 fputs ("neq_oq", file);
15092 break;
15094 case NE:
15095 fputs ("neq", file);
15096 break;
15097 case GE:
15098 if (TARGET_AVX)
15100 fputs ("ge", file);
15101 break;
15103 case UNGE:
15104 fputs ("nlt", file);
15105 break;
15106 case GT:
15107 if (TARGET_AVX)
15109 fputs ("gt", file);
15110 break;
15112 case UNGT:
15113 fputs ("nle", file);
15114 break;
15115 case ORDERED:
15116 fputs ("ord", file);
15117 break;
15118 default:
15119 output_operand_lossage ("operand is not a condition code, "
15120 "invalid operand code 'D'");
15121 return;
15123 return;
15125 case 'F':
15126 case 'f':
15127 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15128 if (ASSEMBLER_DIALECT == ASM_ATT)
15129 putc ('.', file);
15130 #endif
15132 case 'C':
15133 case 'c':
15134 if (!COMPARISON_P (x))
15136 output_operand_lossage ("operand is not a condition code, "
15137 "invalid operand code '%c'", code);
15138 return;
15140 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15141 code == 'c' || code == 'f',
15142 code == 'F' || code == 'f',
15143 file);
15144 return;
15146 case 'H':
15147 if (!offsettable_memref_p (x))
15149 output_operand_lossage ("operand is not an offsettable memory "
15150 "reference, invalid operand code 'H'");
15151 return;
15153 /* It doesn't actually matter what mode we use here, as we're
15154 only going to use this for printing. */
15155 x = adjust_address_nv (x, DImode, 8);
15156 /* Output 'qword ptr' for intel assembler dialect. */
15157 if (ASSEMBLER_DIALECT == ASM_INTEL)
15158 code = 'q';
15159 break;
15161 case 'K':
15162 gcc_assert (CONST_INT_P (x));
15164 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15165 #ifdef HAVE_AS_IX86_HLE
15166 fputs ("xacquire ", file);
15167 #else
15168 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15169 #endif
15170 else if (INTVAL (x) & IX86_HLE_RELEASE)
15171 #ifdef HAVE_AS_IX86_HLE
15172 fputs ("xrelease ", file);
15173 #else
15174 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15175 #endif
15176 /* We do not want to print value of the operand. */
15177 return;
15179 case 'N':
15180 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15181 fputs ("{z}", file);
15182 return;
15184 case 'r':
15185 gcc_assert (CONST_INT_P (x));
15186 gcc_assert (INTVAL (x) == ROUND_SAE);
15188 if (ASSEMBLER_DIALECT == ASM_INTEL)
15189 fputs (", ", file);
15191 fputs ("{sae}", file);
15193 if (ASSEMBLER_DIALECT == ASM_ATT)
15194 fputs (", ", file);
15196 return;
15198 case 'R':
15199 gcc_assert (CONST_INT_P (x));
15201 if (ASSEMBLER_DIALECT == ASM_INTEL)
15202 fputs (", ", file);
15204 switch (INTVAL (x))
15206 case ROUND_NEAREST_INT | ROUND_SAE:
15207 fputs ("{rn-sae}", file);
15208 break;
15209 case ROUND_NEG_INF | ROUND_SAE:
15210 fputs ("{rd-sae}", file);
15211 break;
15212 case ROUND_POS_INF | ROUND_SAE:
15213 fputs ("{ru-sae}", file);
15214 break;
15215 case ROUND_ZERO | ROUND_SAE:
15216 fputs ("{rz-sae}", file);
15217 break;
15218 default:
15219 gcc_unreachable ();
15222 if (ASSEMBLER_DIALECT == ASM_ATT)
15223 fputs (", ", file);
15225 return;
15227 case '*':
15228 if (ASSEMBLER_DIALECT == ASM_ATT)
15229 putc ('*', file);
15230 return;
15232 case '&':
15234 const char *name = get_some_local_dynamic_name ();
15235 if (name == NULL)
15236 output_operand_lossage ("'%%&' used without any "
15237 "local dynamic TLS references");
15238 else
15239 assemble_name (file, name);
15240 return;
15243 case '+':
15245 rtx x;
15247 if (!optimize
15248 || optimize_function_for_size_p (cfun)
15249 || !TARGET_BRANCH_PREDICTION_HINTS)
15250 return;
15252 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15253 if (x)
15255 int pred_val = XINT (x, 0);
15257 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15258 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15260 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15261 bool cputaken
15262 = final_forward_branch_p (current_output_insn) == 0;
15264 /* Emit hints only in the case default branch prediction
15265 heuristics would fail. */
15266 if (taken != cputaken)
15268 /* We use 3e (DS) prefix for taken branches and
15269 2e (CS) prefix for not taken branches. */
15270 if (taken)
15271 fputs ("ds ; ", file);
15272 else
15273 fputs ("cs ; ", file);
15277 return;
15280 case ';':
15281 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15282 putc (';', file);
15283 #endif
15284 return;
15286 case '@':
15287 if (ASSEMBLER_DIALECT == ASM_ATT)
15288 putc ('%', file);
15290 /* The kernel uses a different segment register for performance
15291 reasons; a system call would not have to trash the userspace
15292 segment register, which would be expensive. */
15293 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15294 fputs ("fs", file);
15295 else
15296 fputs ("gs", file);
15297 return;
15299 case '~':
15300 putc (TARGET_AVX2 ? 'i' : 'f', file);
15301 return;
15303 case '^':
15304 if (TARGET_64BIT && Pmode != word_mode)
15305 fputs ("addr32 ", file);
15306 return;
15308 default:
15309 output_operand_lossage ("invalid operand code '%c'", code);
15313 if (REG_P (x))
15314 print_reg (x, code, file);
15316 else if (MEM_P (x))
15318 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15319 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15320 && GET_MODE (x) != BLKmode)
15322 const char * size;
15323 switch (GET_MODE_SIZE (GET_MODE (x)))
15325 case 1: size = "BYTE"; break;
15326 case 2: size = "WORD"; break;
15327 case 4: size = "DWORD"; break;
15328 case 8: size = "QWORD"; break;
15329 case 12: size = "TBYTE"; break;
15330 case 16:
15331 if (GET_MODE (x) == XFmode)
15332 size = "TBYTE";
15333 else
15334 size = "XMMWORD";
15335 break;
15336 case 32: size = "YMMWORD"; break;
15337 case 64: size = "ZMMWORD"; break;
15338 default:
15339 gcc_unreachable ();
15342 /* Check for explicit size override (codes 'b', 'w', 'k',
15343 'q' and 'x') */
15344 if (code == 'b')
15345 size = "BYTE";
15346 else if (code == 'w')
15347 size = "WORD";
15348 else if (code == 'k')
15349 size = "DWORD";
15350 else if (code == 'q')
15351 size = "QWORD";
15352 else if (code == 'x')
15353 size = "XMMWORD";
15355 fputs (size, file);
15356 fputs (" PTR ", file);
15359 x = XEXP (x, 0);
15360 /* Avoid (%rip) for call operands. */
15361 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15362 && !CONST_INT_P (x))
15363 output_addr_const (file, x);
15364 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15365 output_operand_lossage ("invalid constraints for operand");
15366 else
15367 output_address (x);
15370 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15372 REAL_VALUE_TYPE r;
15373 long l;
15375 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15376 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15378 if (ASSEMBLER_DIALECT == ASM_ATT)
15379 putc ('$', file);
15380 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15381 if (code == 'q')
15382 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15383 (unsigned long long) (int) l);
15384 else
15385 fprintf (file, "0x%08x", (unsigned int) l);
15388 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15390 REAL_VALUE_TYPE r;
15391 long l[2];
15393 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15394 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15396 if (ASSEMBLER_DIALECT == ASM_ATT)
15397 putc ('$', file);
15398 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15401 /* These float cases don't actually occur as immediate operands. */
15402 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15404 char dstr[30];
15406 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15407 fputs (dstr, file);
15410 else
15412 /* We have patterns that allow zero sets of memory, for instance.
15413 In 64-bit mode, we should probably support all 8-byte vectors,
15414 since we can in fact encode that into an immediate. */
15415 if (GET_CODE (x) == CONST_VECTOR)
15417 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15418 x = const0_rtx;
15421 if (code != 'P' && code != 'p')
15423 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15425 if (ASSEMBLER_DIALECT == ASM_ATT)
15426 putc ('$', file);
15428 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15429 || GET_CODE (x) == LABEL_REF)
15431 if (ASSEMBLER_DIALECT == ASM_ATT)
15432 putc ('$', file);
15433 else
15434 fputs ("OFFSET FLAT:", file);
15437 if (CONST_INT_P (x))
15438 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15439 else if (flag_pic || MACHOPIC_INDIRECT)
15440 output_pic_addr_const (file, x, code);
15441 else
15442 output_addr_const (file, x);
15446 static bool
15447 ix86_print_operand_punct_valid_p (unsigned char code)
15449 return (code == '@' || code == '*' || code == '+' || code == '&'
15450 || code == ';' || code == '~' || code == '^');
15453 /* Print a memory operand whose address is ADDR. */
15455 static void
15456 ix86_print_operand_address (FILE *file, rtx addr)
15458 struct ix86_address parts;
15459 rtx base, index, disp;
15460 int scale;
15461 int ok;
15462 bool vsib = false;
15463 int code = 0;
15465 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15467 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15468 gcc_assert (parts.index == NULL_RTX);
15469 parts.index = XVECEXP (addr, 0, 1);
15470 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15471 addr = XVECEXP (addr, 0, 0);
15472 vsib = true;
15474 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15476 gcc_assert (TARGET_64BIT);
15477 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15478 code = 'q';
15480 else
15481 ok = ix86_decompose_address (addr, &parts);
15483 gcc_assert (ok);
15485 base = parts.base;
15486 index = parts.index;
15487 disp = parts.disp;
15488 scale = parts.scale;
15490 switch (parts.seg)
15492 case SEG_DEFAULT:
15493 break;
15494 case SEG_FS:
15495 case SEG_GS:
15496 if (ASSEMBLER_DIALECT == ASM_ATT)
15497 putc ('%', file);
15498 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15499 break;
15500 default:
15501 gcc_unreachable ();
15504 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15505 if (TARGET_64BIT && !base && !index)
15507 rtx symbol = disp;
15509 if (GET_CODE (disp) == CONST
15510 && GET_CODE (XEXP (disp, 0)) == PLUS
15511 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15512 symbol = XEXP (XEXP (disp, 0), 0);
15514 if (GET_CODE (symbol) == LABEL_REF
15515 || (GET_CODE (symbol) == SYMBOL_REF
15516 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15517 base = pc_rtx;
15519 if (!base && !index)
15521 /* Displacement only requires special attention. */
15523 if (CONST_INT_P (disp))
15525 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15526 fputs ("ds:", file);
15527 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15529 else if (flag_pic)
15530 output_pic_addr_const (file, disp, 0);
15531 else
15532 output_addr_const (file, disp);
15534 else
15536 /* Print SImode register names to force addr32 prefix. */
15537 if (SImode_address_operand (addr, VOIDmode))
15539 #ifdef ENABLE_CHECKING
15540 gcc_assert (TARGET_64BIT);
15541 switch (GET_CODE (addr))
15543 case SUBREG:
15544 gcc_assert (GET_MODE (addr) == SImode);
15545 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15546 break;
15547 case ZERO_EXTEND:
15548 case AND:
15549 gcc_assert (GET_MODE (addr) == DImode);
15550 break;
15551 default:
15552 gcc_unreachable ();
15554 #endif
15555 gcc_assert (!code);
15556 code = 'k';
15558 else if (code == 0
15559 && TARGET_X32
15560 && disp
15561 && CONST_INT_P (disp)
15562 && INTVAL (disp) < -16*1024*1024)
15564 /* X32 runs in 64-bit mode, where displacement, DISP, in
15565 address DISP(%r64), is encoded as 32-bit immediate sign-
15566 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15567 address is %r64 + 0xffffffffbffffd00. When %r64 <
15568 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15569 which is invalid for x32. The correct address is %r64
15570 - 0x40000300 == 0xf7ffdd64. To properly encode
15571 -0x40000300(%r64) for x32, we zero-extend negative
15572 displacement by forcing addr32 prefix which truncates
15573 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15574 zero-extend all negative displacements, including -1(%rsp).
15575 However, for small negative displacements, sign-extension
15576 won't cause overflow. We only zero-extend negative
15577 displacements if they < -16*1024*1024, which is also used
15578 to check legitimate address displacements for PIC. */
15579 code = 'k';
15582 if (ASSEMBLER_DIALECT == ASM_ATT)
15584 if (disp)
15586 if (flag_pic)
15587 output_pic_addr_const (file, disp, 0);
15588 else if (GET_CODE (disp) == LABEL_REF)
15589 output_asm_label (disp);
15590 else
15591 output_addr_const (file, disp);
15594 putc ('(', file);
15595 if (base)
15596 print_reg (base, code, file);
15597 if (index)
15599 putc (',', file);
15600 print_reg (index, vsib ? 0 : code, file);
15601 if (scale != 1 || vsib)
15602 fprintf (file, ",%d", scale);
15604 putc (')', file);
15606 else
15608 rtx offset = NULL_RTX;
15610 if (disp)
15612 /* Pull out the offset of a symbol; print any symbol itself. */
15613 if (GET_CODE (disp) == CONST
15614 && GET_CODE (XEXP (disp, 0)) == PLUS
15615 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15617 offset = XEXP (XEXP (disp, 0), 1);
15618 disp = gen_rtx_CONST (VOIDmode,
15619 XEXP (XEXP (disp, 0), 0));
15622 if (flag_pic)
15623 output_pic_addr_const (file, disp, 0);
15624 else if (GET_CODE (disp) == LABEL_REF)
15625 output_asm_label (disp);
15626 else if (CONST_INT_P (disp))
15627 offset = disp;
15628 else
15629 output_addr_const (file, disp);
15632 putc ('[', file);
15633 if (base)
15635 print_reg (base, code, file);
15636 if (offset)
15638 if (INTVAL (offset) >= 0)
15639 putc ('+', file);
15640 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15643 else if (offset)
15644 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15645 else
15646 putc ('0', file);
15648 if (index)
15650 putc ('+', file);
15651 print_reg (index, vsib ? 0 : code, file);
15652 if (scale != 1 || vsib)
15653 fprintf (file, "*%d", scale);
15655 putc (']', file);
15660 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15662 static bool
15663 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15665 rtx op;
15667 if (GET_CODE (x) != UNSPEC)
15668 return false;
15670 op = XVECEXP (x, 0, 0);
15671 switch (XINT (x, 1))
15673 case UNSPEC_GOTTPOFF:
15674 output_addr_const (file, op);
15675 /* FIXME: This might be @TPOFF in Sun ld. */
15676 fputs ("@gottpoff", file);
15677 break;
15678 case UNSPEC_TPOFF:
15679 output_addr_const (file, op);
15680 fputs ("@tpoff", file);
15681 break;
15682 case UNSPEC_NTPOFF:
15683 output_addr_const (file, op);
15684 if (TARGET_64BIT)
15685 fputs ("@tpoff", file);
15686 else
15687 fputs ("@ntpoff", file);
15688 break;
15689 case UNSPEC_DTPOFF:
15690 output_addr_const (file, op);
15691 fputs ("@dtpoff", file);
15692 break;
15693 case UNSPEC_GOTNTPOFF:
15694 output_addr_const (file, op);
15695 if (TARGET_64BIT)
15696 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15697 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15698 else
15699 fputs ("@gotntpoff", file);
15700 break;
15701 case UNSPEC_INDNTPOFF:
15702 output_addr_const (file, op);
15703 fputs ("@indntpoff", file);
15704 break;
15705 #if TARGET_MACHO
15706 case UNSPEC_MACHOPIC_OFFSET:
15707 output_addr_const (file, op);
15708 putc ('-', file);
15709 machopic_output_function_base_name (file);
15710 break;
15711 #endif
15713 case UNSPEC_STACK_CHECK:
15715 int offset;
15717 gcc_assert (flag_split_stack);
15719 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15720 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15721 #else
15722 gcc_unreachable ();
15723 #endif
15725 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15727 break;
15729 default:
15730 return false;
15733 return true;
15736 /* Split one or more double-mode RTL references into pairs of half-mode
15737 references. The RTL can be REG, offsettable MEM, integer constant, or
15738 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15739 split and "num" is its length. lo_half and hi_half are output arrays
15740 that parallel "operands". */
15742 void
15743 split_double_mode (enum machine_mode mode, rtx operands[],
15744 int num, rtx lo_half[], rtx hi_half[])
15746 enum machine_mode half_mode;
15747 unsigned int byte;
15749 switch (mode)
15751 case TImode:
15752 half_mode = DImode;
15753 break;
15754 case DImode:
15755 half_mode = SImode;
15756 break;
15757 default:
15758 gcc_unreachable ();
15761 byte = GET_MODE_SIZE (half_mode);
15763 while (num--)
15765 rtx op = operands[num];
15767 /* simplify_subreg refuse to split volatile memory addresses,
15768 but we still have to handle it. */
15769 if (MEM_P (op))
15771 lo_half[num] = adjust_address (op, half_mode, 0);
15772 hi_half[num] = adjust_address (op, half_mode, byte);
15774 else
15776 lo_half[num] = simplify_gen_subreg (half_mode, op,
15777 GET_MODE (op) == VOIDmode
15778 ? mode : GET_MODE (op), 0);
15779 hi_half[num] = simplify_gen_subreg (half_mode, op,
15780 GET_MODE (op) == VOIDmode
15781 ? mode : GET_MODE (op), byte);
15786 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15787 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15788 is the expression of the binary operation. The output may either be
15789 emitted here, or returned to the caller, like all output_* functions.
15791 There is no guarantee that the operands are the same mode, as they
15792 might be within FLOAT or FLOAT_EXTEND expressions. */
15794 #ifndef SYSV386_COMPAT
15795 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15796 wants to fix the assemblers because that causes incompatibility
15797 with gcc. No-one wants to fix gcc because that causes
15798 incompatibility with assemblers... You can use the option of
15799 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15800 #define SYSV386_COMPAT 1
15801 #endif
15803 const char *
15804 output_387_binary_op (rtx insn, rtx *operands)
15806 static char buf[40];
15807 const char *p;
15808 const char *ssep;
15809 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15811 #ifdef ENABLE_CHECKING
15812 /* Even if we do not want to check the inputs, this documents input
15813 constraints. Which helps in understanding the following code. */
15814 if (STACK_REG_P (operands[0])
15815 && ((REG_P (operands[1])
15816 && REGNO (operands[0]) == REGNO (operands[1])
15817 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15818 || (REG_P (operands[2])
15819 && REGNO (operands[0]) == REGNO (operands[2])
15820 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15821 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15822 ; /* ok */
15823 else
15824 gcc_assert (is_sse);
15825 #endif
15827 switch (GET_CODE (operands[3]))
15829 case PLUS:
15830 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15831 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15832 p = "fiadd";
15833 else
15834 p = "fadd";
15835 ssep = "vadd";
15836 break;
15838 case MINUS:
15839 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15840 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15841 p = "fisub";
15842 else
15843 p = "fsub";
15844 ssep = "vsub";
15845 break;
15847 case MULT:
15848 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15849 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15850 p = "fimul";
15851 else
15852 p = "fmul";
15853 ssep = "vmul";
15854 break;
15856 case DIV:
15857 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15858 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15859 p = "fidiv";
15860 else
15861 p = "fdiv";
15862 ssep = "vdiv";
15863 break;
15865 default:
15866 gcc_unreachable ();
15869 if (is_sse)
15871 if (TARGET_AVX)
15873 strcpy (buf, ssep);
15874 if (GET_MODE (operands[0]) == SFmode)
15875 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15876 else
15877 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15879 else
15881 strcpy (buf, ssep + 1);
15882 if (GET_MODE (operands[0]) == SFmode)
15883 strcat (buf, "ss\t{%2, %0|%0, %2}");
15884 else
15885 strcat (buf, "sd\t{%2, %0|%0, %2}");
15887 return buf;
15889 strcpy (buf, p);
15891 switch (GET_CODE (operands[3]))
15893 case MULT:
15894 case PLUS:
15895 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15897 rtx temp = operands[2];
15898 operands[2] = operands[1];
15899 operands[1] = temp;
15902 /* know operands[0] == operands[1]. */
15904 if (MEM_P (operands[2]))
15906 p = "%Z2\t%2";
15907 break;
15910 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15912 if (STACK_TOP_P (operands[0]))
15913 /* How is it that we are storing to a dead operand[2]?
15914 Well, presumably operands[1] is dead too. We can't
15915 store the result to st(0) as st(0) gets popped on this
15916 instruction. Instead store to operands[2] (which I
15917 think has to be st(1)). st(1) will be popped later.
15918 gcc <= 2.8.1 didn't have this check and generated
15919 assembly code that the Unixware assembler rejected. */
15920 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15921 else
15922 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15923 break;
15926 if (STACK_TOP_P (operands[0]))
15927 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15928 else
15929 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15930 break;
15932 case MINUS:
15933 case DIV:
15934 if (MEM_P (operands[1]))
15936 p = "r%Z1\t%1";
15937 break;
15940 if (MEM_P (operands[2]))
15942 p = "%Z2\t%2";
15943 break;
15946 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15948 #if SYSV386_COMPAT
15949 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15950 derived assemblers, confusingly reverse the direction of
15951 the operation for fsub{r} and fdiv{r} when the
15952 destination register is not st(0). The Intel assembler
15953 doesn't have this brain damage. Read !SYSV386_COMPAT to
15954 figure out what the hardware really does. */
15955 if (STACK_TOP_P (operands[0]))
15956 p = "{p\t%0, %2|rp\t%2, %0}";
15957 else
15958 p = "{rp\t%2, %0|p\t%0, %2}";
15959 #else
15960 if (STACK_TOP_P (operands[0]))
15961 /* As above for fmul/fadd, we can't store to st(0). */
15962 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15963 else
15964 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15965 #endif
15966 break;
15969 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15971 #if SYSV386_COMPAT
15972 if (STACK_TOP_P (operands[0]))
15973 p = "{rp\t%0, %1|p\t%1, %0}";
15974 else
15975 p = "{p\t%1, %0|rp\t%0, %1}";
15976 #else
15977 if (STACK_TOP_P (operands[0]))
15978 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15979 else
15980 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15981 #endif
15982 break;
15985 if (STACK_TOP_P (operands[0]))
15987 if (STACK_TOP_P (operands[1]))
15988 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15989 else
15990 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15991 break;
15993 else if (STACK_TOP_P (operands[1]))
15995 #if SYSV386_COMPAT
15996 p = "{\t%1, %0|r\t%0, %1}";
15997 #else
15998 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15999 #endif
16001 else
16003 #if SYSV386_COMPAT
16004 p = "{r\t%2, %0|\t%0, %2}";
16005 #else
16006 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16007 #endif
16009 break;
16011 default:
16012 gcc_unreachable ();
16015 strcat (buf, p);
16016 return buf;
16019 /* Check if a 256bit AVX register is referenced inside of EXP. */
16021 static int
16022 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16024 rtx exp = *pexp;
16026 if (GET_CODE (exp) == SUBREG)
16027 exp = SUBREG_REG (exp);
16029 if (REG_P (exp)
16030 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16031 return 1;
16033 return 0;
16036 /* Return needed mode for entity in optimize_mode_switching pass. */
16038 static int
16039 ix86_avx_u128_mode_needed (rtx insn)
16041 if (CALL_P (insn))
16043 rtx link;
16045 /* Needed mode is set to AVX_U128_CLEAN if there are
16046 no 256bit modes used in function arguments. */
16047 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16048 link;
16049 link = XEXP (link, 1))
16051 if (GET_CODE (XEXP (link, 0)) == USE)
16053 rtx arg = XEXP (XEXP (link, 0), 0);
16055 if (ix86_check_avx256_register (&arg, NULL))
16056 return AVX_U128_DIRTY;
16060 return AVX_U128_CLEAN;
16063 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16064 changes state only when a 256bit register is written to, but we need
16065 to prevent the compiler from moving optimal insertion point above
16066 eventual read from 256bit register. */
16067 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16068 return AVX_U128_DIRTY;
16070 return AVX_U128_ANY;
16073 /* Return mode that i387 must be switched into
16074 prior to the execution of insn. */
16076 static int
16077 ix86_i387_mode_needed (int entity, rtx insn)
16079 enum attr_i387_cw mode;
16081 /* The mode UNINITIALIZED is used to store control word after a
16082 function call or ASM pattern. The mode ANY specify that function
16083 has no requirements on the control word and make no changes in the
16084 bits we are interested in. */
16086 if (CALL_P (insn)
16087 || (NONJUMP_INSN_P (insn)
16088 && (asm_noperands (PATTERN (insn)) >= 0
16089 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16090 return I387_CW_UNINITIALIZED;
16092 if (recog_memoized (insn) < 0)
16093 return I387_CW_ANY;
16095 mode = get_attr_i387_cw (insn);
16097 switch (entity)
16099 case I387_TRUNC:
16100 if (mode == I387_CW_TRUNC)
16101 return mode;
16102 break;
16104 case I387_FLOOR:
16105 if (mode == I387_CW_FLOOR)
16106 return mode;
16107 break;
16109 case I387_CEIL:
16110 if (mode == I387_CW_CEIL)
16111 return mode;
16112 break;
16114 case I387_MASK_PM:
16115 if (mode == I387_CW_MASK_PM)
16116 return mode;
16117 break;
16119 default:
16120 gcc_unreachable ();
16123 return I387_CW_ANY;
16126 /* Return mode that entity must be switched into
16127 prior to the execution of insn. */
16130 ix86_mode_needed (int entity, rtx insn)
16132 switch (entity)
16134 case AVX_U128:
16135 return ix86_avx_u128_mode_needed (insn);
16136 case I387_TRUNC:
16137 case I387_FLOOR:
16138 case I387_CEIL:
16139 case I387_MASK_PM:
16140 return ix86_i387_mode_needed (entity, insn);
16141 default:
16142 gcc_unreachable ();
16144 return 0;
16147 /* Check if a 256bit AVX register is referenced in stores. */
16149 static void
16150 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16152 if (ix86_check_avx256_register (&dest, NULL))
16154 bool *used = (bool *) data;
16155 *used = true;
16159 /* Calculate mode of upper 128bit AVX registers after the insn. */
16161 static int
16162 ix86_avx_u128_mode_after (int mode, rtx insn)
16164 rtx pat = PATTERN (insn);
16166 if (vzeroupper_operation (pat, VOIDmode)
16167 || vzeroall_operation (pat, VOIDmode))
16168 return AVX_U128_CLEAN;
16170 /* We know that state is clean after CALL insn if there are no
16171 256bit registers used in the function return register. */
16172 if (CALL_P (insn))
16174 bool avx_reg256_found = false;
16175 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16177 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16180 /* Otherwise, return current mode. Remember that if insn
16181 references AVX 256bit registers, the mode was already changed
16182 to DIRTY from MODE_NEEDED. */
16183 return mode;
16186 /* Return the mode that an insn results in. */
16189 ix86_mode_after (int entity, int mode, rtx insn)
16191 switch (entity)
16193 case AVX_U128:
16194 return ix86_avx_u128_mode_after (mode, insn);
16195 case I387_TRUNC:
16196 case I387_FLOOR:
16197 case I387_CEIL:
16198 case I387_MASK_PM:
16199 return mode;
16200 default:
16201 gcc_unreachable ();
16205 static int
16206 ix86_avx_u128_mode_entry (void)
16208 tree arg;
16210 /* Entry mode is set to AVX_U128_DIRTY if there are
16211 256bit modes used in function arguments. */
16212 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16213 arg = TREE_CHAIN (arg))
16215 rtx incoming = DECL_INCOMING_RTL (arg);
16217 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16218 return AVX_U128_DIRTY;
16221 return AVX_U128_CLEAN;
16224 /* Return a mode that ENTITY is assumed to be
16225 switched to at function entry. */
16228 ix86_mode_entry (int entity)
16230 switch (entity)
16232 case AVX_U128:
16233 return ix86_avx_u128_mode_entry ();
16234 case I387_TRUNC:
16235 case I387_FLOOR:
16236 case I387_CEIL:
16237 case I387_MASK_PM:
16238 return I387_CW_ANY;
16239 default:
16240 gcc_unreachable ();
16244 static int
16245 ix86_avx_u128_mode_exit (void)
16247 rtx reg = crtl->return_rtx;
16249 /* Exit mode is set to AVX_U128_DIRTY if there are
16250 256bit modes used in the function return register. */
16251 if (reg && ix86_check_avx256_register (&reg, NULL))
16252 return AVX_U128_DIRTY;
16254 return AVX_U128_CLEAN;
16257 /* Return a mode that ENTITY is assumed to be
16258 switched to at function exit. */
16261 ix86_mode_exit (int entity)
16263 switch (entity)
16265 case AVX_U128:
16266 return ix86_avx_u128_mode_exit ();
16267 case I387_TRUNC:
16268 case I387_FLOOR:
16269 case I387_CEIL:
16270 case I387_MASK_PM:
16271 return I387_CW_ANY;
16272 default:
16273 gcc_unreachable ();
16277 /* Output code to initialize control word copies used by trunc?f?i and
16278 rounding patterns. CURRENT_MODE is set to current control word,
16279 while NEW_MODE is set to new control word. */
16281 static void
16282 emit_i387_cw_initialization (int mode)
16284 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16285 rtx new_mode;
16287 enum ix86_stack_slot slot;
16289 rtx reg = gen_reg_rtx (HImode);
16291 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16292 emit_move_insn (reg, copy_rtx (stored_mode));
16294 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16295 || optimize_insn_for_size_p ())
16297 switch (mode)
16299 case I387_CW_TRUNC:
16300 /* round toward zero (truncate) */
16301 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16302 slot = SLOT_CW_TRUNC;
16303 break;
16305 case I387_CW_FLOOR:
16306 /* round down toward -oo */
16307 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16308 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16309 slot = SLOT_CW_FLOOR;
16310 break;
16312 case I387_CW_CEIL:
16313 /* round up toward +oo */
16314 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16315 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16316 slot = SLOT_CW_CEIL;
16317 break;
16319 case I387_CW_MASK_PM:
16320 /* mask precision exception for nearbyint() */
16321 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16322 slot = SLOT_CW_MASK_PM;
16323 break;
16325 default:
16326 gcc_unreachable ();
16329 else
16331 switch (mode)
16333 case I387_CW_TRUNC:
16334 /* round toward zero (truncate) */
16335 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16336 slot = SLOT_CW_TRUNC;
16337 break;
16339 case I387_CW_FLOOR:
16340 /* round down toward -oo */
16341 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16342 slot = SLOT_CW_FLOOR;
16343 break;
16345 case I387_CW_CEIL:
16346 /* round up toward +oo */
16347 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16348 slot = SLOT_CW_CEIL;
16349 break;
16351 case I387_CW_MASK_PM:
16352 /* mask precision exception for nearbyint() */
16353 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16354 slot = SLOT_CW_MASK_PM;
16355 break;
16357 default:
16358 gcc_unreachable ();
16362 gcc_assert (slot < MAX_386_STACK_LOCALS);
16364 new_mode = assign_386_stack_local (HImode, slot);
16365 emit_move_insn (new_mode, reg);
16368 /* Emit vzeroupper. */
16370 void
16371 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16373 int i;
16375 /* Cancel automatic vzeroupper insertion if there are
16376 live call-saved SSE registers at the insertion point. */
16378 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16379 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16380 return;
16382 if (TARGET_64BIT)
16383 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16384 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16385 return;
16387 emit_insn (gen_avx_vzeroupper ());
16390 /* Generate one or more insns to set ENTITY to MODE. */
16392 void
16393 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16395 switch (entity)
16397 case AVX_U128:
16398 if (mode == AVX_U128_CLEAN)
16399 ix86_avx_emit_vzeroupper (regs_live);
16400 break;
16401 case I387_TRUNC:
16402 case I387_FLOOR:
16403 case I387_CEIL:
16404 case I387_MASK_PM:
16405 if (mode != I387_CW_ANY
16406 && mode != I387_CW_UNINITIALIZED)
16407 emit_i387_cw_initialization (mode);
16408 break;
16409 default:
16410 gcc_unreachable ();
16414 /* Output code for INSN to convert a float to a signed int. OPERANDS
16415 are the insn operands. The output may be [HSD]Imode and the input
16416 operand may be [SDX]Fmode. */
16418 const char *
16419 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16421 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16422 int dimode_p = GET_MODE (operands[0]) == DImode;
16423 int round_mode = get_attr_i387_cw (insn);
16425 /* Jump through a hoop or two for DImode, since the hardware has no
16426 non-popping instruction. We used to do this a different way, but
16427 that was somewhat fragile and broke with post-reload splitters. */
16428 if ((dimode_p || fisttp) && !stack_top_dies)
16429 output_asm_insn ("fld\t%y1", operands);
16431 gcc_assert (STACK_TOP_P (operands[1]));
16432 gcc_assert (MEM_P (operands[0]));
16433 gcc_assert (GET_MODE (operands[1]) != TFmode);
16435 if (fisttp)
16436 output_asm_insn ("fisttp%Z0\t%0", operands);
16437 else
16439 if (round_mode != I387_CW_ANY)
16440 output_asm_insn ("fldcw\t%3", operands);
16441 if (stack_top_dies || dimode_p)
16442 output_asm_insn ("fistp%Z0\t%0", operands);
16443 else
16444 output_asm_insn ("fist%Z0\t%0", operands);
16445 if (round_mode != I387_CW_ANY)
16446 output_asm_insn ("fldcw\t%2", operands);
16449 return "";
16452 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16453 have the values zero or one, indicates the ffreep insn's operand
16454 from the OPERANDS array. */
16456 static const char *
16457 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16459 if (TARGET_USE_FFREEP)
16460 #ifdef HAVE_AS_IX86_FFREEP
16461 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16462 #else
16464 static char retval[32];
16465 int regno = REGNO (operands[opno]);
16467 gcc_assert (STACK_REGNO_P (regno));
16469 regno -= FIRST_STACK_REG;
16471 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16472 return retval;
16474 #endif
16476 return opno ? "fstp\t%y1" : "fstp\t%y0";
16480 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16481 should be used. UNORDERED_P is true when fucom should be used. */
16483 const char *
16484 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16486 int stack_top_dies;
16487 rtx cmp_op0, cmp_op1;
16488 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16490 if (eflags_p)
16492 cmp_op0 = operands[0];
16493 cmp_op1 = operands[1];
16495 else
16497 cmp_op0 = operands[1];
16498 cmp_op1 = operands[2];
16501 if (is_sse)
16503 if (GET_MODE (operands[0]) == SFmode)
16504 if (unordered_p)
16505 return "%vucomiss\t{%1, %0|%0, %1}";
16506 else
16507 return "%vcomiss\t{%1, %0|%0, %1}";
16508 else
16509 if (unordered_p)
16510 return "%vucomisd\t{%1, %0|%0, %1}";
16511 else
16512 return "%vcomisd\t{%1, %0|%0, %1}";
16515 gcc_assert (STACK_TOP_P (cmp_op0));
16517 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16519 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16521 if (stack_top_dies)
16523 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16524 return output_387_ffreep (operands, 1);
16526 else
16527 return "ftst\n\tfnstsw\t%0";
16530 if (STACK_REG_P (cmp_op1)
16531 && stack_top_dies
16532 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16533 && REGNO (cmp_op1) != FIRST_STACK_REG)
16535 /* If both the top of the 387 stack dies, and the other operand
16536 is also a stack register that dies, then this must be a
16537 `fcompp' float compare */
16539 if (eflags_p)
16541 /* There is no double popping fcomi variant. Fortunately,
16542 eflags is immune from the fstp's cc clobbering. */
16543 if (unordered_p)
16544 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16545 else
16546 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16547 return output_387_ffreep (operands, 0);
16549 else
16551 if (unordered_p)
16552 return "fucompp\n\tfnstsw\t%0";
16553 else
16554 return "fcompp\n\tfnstsw\t%0";
16557 else
16559 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16561 static const char * const alt[16] =
16563 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16564 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16565 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16566 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16568 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16569 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16570 NULL,
16571 NULL,
16573 "fcomi\t{%y1, %0|%0, %y1}",
16574 "fcomip\t{%y1, %0|%0, %y1}",
16575 "fucomi\t{%y1, %0|%0, %y1}",
16576 "fucomip\t{%y1, %0|%0, %y1}",
16578 NULL,
16579 NULL,
16580 NULL,
16581 NULL
16584 int mask;
16585 const char *ret;
16587 mask = eflags_p << 3;
16588 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16589 mask |= unordered_p << 1;
16590 mask |= stack_top_dies;
16592 gcc_assert (mask < 16);
16593 ret = alt[mask];
16594 gcc_assert (ret);
16596 return ret;
16600 void
16601 ix86_output_addr_vec_elt (FILE *file, int value)
16603 const char *directive = ASM_LONG;
16605 #ifdef ASM_QUAD
16606 if (TARGET_LP64)
16607 directive = ASM_QUAD;
16608 #else
16609 gcc_assert (!TARGET_64BIT);
16610 #endif
16612 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16615 void
16616 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16618 const char *directive = ASM_LONG;
16620 #ifdef ASM_QUAD
16621 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16622 directive = ASM_QUAD;
16623 #else
16624 gcc_assert (!TARGET_64BIT);
16625 #endif
16626 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16627 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16628 fprintf (file, "%s%s%d-%s%d\n",
16629 directive, LPREFIX, value, LPREFIX, rel);
16630 else if (HAVE_AS_GOTOFF_IN_DATA)
16631 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16632 #if TARGET_MACHO
16633 else if (TARGET_MACHO)
16635 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16636 machopic_output_function_base_name (file);
16637 putc ('\n', file);
16639 #endif
16640 else
16641 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16642 GOT_SYMBOL_NAME, LPREFIX, value);
16645 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16646 for the target. */
16648 void
16649 ix86_expand_clear (rtx dest)
16651 rtx tmp;
16653 /* We play register width games, which are only valid after reload. */
16654 gcc_assert (reload_completed);
16656 /* Avoid HImode and its attendant prefix byte. */
16657 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16658 dest = gen_rtx_REG (SImode, REGNO (dest));
16659 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16661 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16662 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16664 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16665 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16668 emit_insn (tmp);
16671 /* X is an unchanging MEM. If it is a constant pool reference, return
16672 the constant pool rtx, else NULL. */
16675 maybe_get_pool_constant (rtx x)
16677 x = ix86_delegitimize_address (XEXP (x, 0));
16679 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16680 return get_pool_constant (x);
16682 return NULL_RTX;
16685 void
16686 ix86_expand_move (enum machine_mode mode, rtx operands[])
16688 rtx op0, op1;
16689 enum tls_model model;
16691 op0 = operands[0];
16692 op1 = operands[1];
16694 if (GET_CODE (op1) == SYMBOL_REF)
16696 rtx tmp;
16698 model = SYMBOL_REF_TLS_MODEL (op1);
16699 if (model)
16701 op1 = legitimize_tls_address (op1, model, true);
16702 op1 = force_operand (op1, op0);
16703 if (op1 == op0)
16704 return;
16705 op1 = convert_to_mode (mode, op1, 1);
16707 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16708 op1 = tmp;
16710 else if (GET_CODE (op1) == CONST
16711 && GET_CODE (XEXP (op1, 0)) == PLUS
16712 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16714 rtx addend = XEXP (XEXP (op1, 0), 1);
16715 rtx symbol = XEXP (XEXP (op1, 0), 0);
16716 rtx tmp;
16718 model = SYMBOL_REF_TLS_MODEL (symbol);
16719 if (model)
16720 tmp = legitimize_tls_address (symbol, model, true);
16721 else
16722 tmp = legitimize_pe_coff_symbol (symbol, true);
16724 if (tmp)
16726 tmp = force_operand (tmp, NULL);
16727 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16728 op0, 1, OPTAB_DIRECT);
16729 if (tmp == op0)
16730 return;
16731 op1 = convert_to_mode (mode, tmp, 1);
16735 if ((flag_pic || MACHOPIC_INDIRECT)
16736 && symbolic_operand (op1, mode))
16738 if (TARGET_MACHO && !TARGET_64BIT)
16740 #if TARGET_MACHO
16741 /* dynamic-no-pic */
16742 if (MACHOPIC_INDIRECT)
16744 rtx temp = ((reload_in_progress
16745 || ((op0 && REG_P (op0))
16746 && mode == Pmode))
16747 ? op0 : gen_reg_rtx (Pmode));
16748 op1 = machopic_indirect_data_reference (op1, temp);
16749 if (MACHOPIC_PURE)
16750 op1 = machopic_legitimize_pic_address (op1, mode,
16751 temp == op1 ? 0 : temp);
16753 if (op0 != op1 && GET_CODE (op0) != MEM)
16755 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16756 emit_insn (insn);
16757 return;
16759 if (GET_CODE (op0) == MEM)
16760 op1 = force_reg (Pmode, op1);
16761 else
16763 rtx temp = op0;
16764 if (GET_CODE (temp) != REG)
16765 temp = gen_reg_rtx (Pmode);
16766 temp = legitimize_pic_address (op1, temp);
16767 if (temp == op0)
16768 return;
16769 op1 = temp;
16771 /* dynamic-no-pic */
16772 #endif
16774 else
16776 if (MEM_P (op0))
16777 op1 = force_reg (mode, op1);
16778 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16780 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16781 op1 = legitimize_pic_address (op1, reg);
16782 if (op0 == op1)
16783 return;
16784 op1 = convert_to_mode (mode, op1, 1);
16788 else
16790 if (MEM_P (op0)
16791 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16792 || !push_operand (op0, mode))
16793 && MEM_P (op1))
16794 op1 = force_reg (mode, op1);
16796 if (push_operand (op0, mode)
16797 && ! general_no_elim_operand (op1, mode))
16798 op1 = copy_to_mode_reg (mode, op1);
16800 /* Force large constants in 64bit compilation into register
16801 to get them CSEed. */
16802 if (can_create_pseudo_p ()
16803 && (mode == DImode) && TARGET_64BIT
16804 && immediate_operand (op1, mode)
16805 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16806 && !register_operand (op0, mode)
16807 && optimize)
16808 op1 = copy_to_mode_reg (mode, op1);
16810 if (can_create_pseudo_p ()
16811 && FLOAT_MODE_P (mode)
16812 && GET_CODE (op1) == CONST_DOUBLE)
16814 /* If we are loading a floating point constant to a register,
16815 force the value to memory now, since we'll get better code
16816 out the back end. */
16818 op1 = validize_mem (force_const_mem (mode, op1));
16819 if (!register_operand (op0, mode))
16821 rtx temp = gen_reg_rtx (mode);
16822 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16823 emit_move_insn (op0, temp);
16824 return;
16829 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16832 void
16833 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16835 rtx op0 = operands[0], op1 = operands[1];
16836 unsigned int align = GET_MODE_ALIGNMENT (mode);
16838 if (push_operand (op0, VOIDmode))
16839 op0 = emit_move_resolve_push (mode, op0);
16841 /* Force constants other than zero into memory. We do not know how
16842 the instructions used to build constants modify the upper 64 bits
16843 of the register, once we have that information we may be able
16844 to handle some of them more efficiently. */
16845 if (can_create_pseudo_p ()
16846 && register_operand (op0, mode)
16847 && (CONSTANT_P (op1)
16848 || (GET_CODE (op1) == SUBREG
16849 && CONSTANT_P (SUBREG_REG (op1))))
16850 && !standard_sse_constant_p (op1))
16851 op1 = validize_mem (force_const_mem (mode, op1));
16853 /* We need to check memory alignment for SSE mode since attribute
16854 can make operands unaligned. */
16855 if (can_create_pseudo_p ()
16856 && SSE_REG_MODE_P (mode)
16857 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16858 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16860 rtx tmp[2];
16862 /* ix86_expand_vector_move_misalign() does not like constants ... */
16863 if (CONSTANT_P (op1)
16864 || (GET_CODE (op1) == SUBREG
16865 && CONSTANT_P (SUBREG_REG (op1))))
16866 op1 = validize_mem (force_const_mem (mode, op1));
16868 /* ... nor both arguments in memory. */
16869 if (!register_operand (op0, mode)
16870 && !register_operand (op1, mode))
16871 op1 = force_reg (mode, op1);
16873 tmp[0] = op0; tmp[1] = op1;
16874 ix86_expand_vector_move_misalign (mode, tmp);
16875 return;
16878 /* Make operand1 a register if it isn't already. */
16879 if (can_create_pseudo_p ()
16880 && !register_operand (op0, mode)
16881 && !register_operand (op1, mode))
16883 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16884 return;
16887 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16890 /* Split 32-byte AVX unaligned load and store if needed. */
16892 static void
16893 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16895 rtx m;
16896 rtx (*extract) (rtx, rtx, rtx);
16897 rtx (*load_unaligned) (rtx, rtx);
16898 rtx (*store_unaligned) (rtx, rtx);
16899 enum machine_mode mode;
16901 switch (GET_MODE (op0))
16903 default:
16904 gcc_unreachable ();
16905 case V32QImode:
16906 extract = gen_avx_vextractf128v32qi;
16907 load_unaligned = gen_avx_loaddquv32qi;
16908 store_unaligned = gen_avx_storedquv32qi;
16909 mode = V16QImode;
16910 break;
16911 case V8SFmode:
16912 extract = gen_avx_vextractf128v8sf;
16913 load_unaligned = gen_avx_loadups256;
16914 store_unaligned = gen_avx_storeups256;
16915 mode = V4SFmode;
16916 break;
16917 case V4DFmode:
16918 extract = gen_avx_vextractf128v4df;
16919 load_unaligned = gen_avx_loadupd256;
16920 store_unaligned = gen_avx_storeupd256;
16921 mode = V2DFmode;
16922 break;
16925 if (MEM_P (op1))
16927 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16929 rtx r = gen_reg_rtx (mode);
16930 m = adjust_address (op1, mode, 0);
16931 emit_move_insn (r, m);
16932 m = adjust_address (op1, mode, 16);
16933 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16934 emit_move_insn (op0, r);
16936 /* Normal *mov<mode>_internal pattern will handle
16937 unaligned loads just fine if misaligned_operand
16938 is true, and without the UNSPEC it can be combined
16939 with arithmetic instructions. */
16940 else if (misaligned_operand (op1, GET_MODE (op1)))
16941 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16942 else
16943 emit_insn (load_unaligned (op0, op1));
16945 else if (MEM_P (op0))
16947 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16949 m = adjust_address (op0, mode, 0);
16950 emit_insn (extract (m, op1, const0_rtx));
16951 m = adjust_address (op0, mode, 16);
16952 emit_insn (extract (m, op1, const1_rtx));
16954 else
16955 emit_insn (store_unaligned (op0, op1));
16957 else
16958 gcc_unreachable ();
16961 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16962 straight to ix86_expand_vector_move. */
16963 /* Code generation for scalar reg-reg moves of single and double precision data:
16964 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16965 movaps reg, reg
16966 else
16967 movss reg, reg
16968 if (x86_sse_partial_reg_dependency == true)
16969 movapd reg, reg
16970 else
16971 movsd reg, reg
16973 Code generation for scalar loads of double precision data:
16974 if (x86_sse_split_regs == true)
16975 movlpd mem, reg (gas syntax)
16976 else
16977 movsd mem, reg
16979 Code generation for unaligned packed loads of single precision data
16980 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16981 if (x86_sse_unaligned_move_optimal)
16982 movups mem, reg
16984 if (x86_sse_partial_reg_dependency == true)
16986 xorps reg, reg
16987 movlps mem, reg
16988 movhps mem+8, reg
16990 else
16992 movlps mem, reg
16993 movhps mem+8, reg
16996 Code generation for unaligned packed loads of double precision data
16997 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16998 if (x86_sse_unaligned_move_optimal)
16999 movupd mem, reg
17001 if (x86_sse_split_regs == true)
17003 movlpd mem, reg
17004 movhpd mem+8, reg
17006 else
17008 movsd mem, reg
17009 movhpd mem+8, reg
17013 void
17014 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17016 rtx op0, op1, orig_op0 = NULL_RTX, m;
17017 rtx (*load_unaligned) (rtx, rtx);
17018 rtx (*store_unaligned) (rtx, rtx);
17020 op0 = operands[0];
17021 op1 = operands[1];
17023 if (GET_MODE_SIZE (mode) == 64)
17025 switch (GET_MODE_CLASS (mode))
17027 case MODE_VECTOR_INT:
17028 case MODE_INT:
17029 if (GET_MODE (op0) != V16SImode)
17031 if (!MEM_P (op0))
17033 orig_op0 = op0;
17034 op0 = gen_reg_rtx (V16SImode);
17036 else
17037 op0 = gen_lowpart (V16SImode, op0);
17039 op1 = gen_lowpart (V16SImode, op1);
17040 /* FALLTHRU */
17042 case MODE_VECTOR_FLOAT:
17043 switch (GET_MODE (op0))
17045 default:
17046 gcc_unreachable ();
17047 case V16SImode:
17048 load_unaligned = gen_avx512f_loaddquv16si;
17049 store_unaligned = gen_avx512f_storedquv16si;
17050 break;
17051 case V16SFmode:
17052 load_unaligned = gen_avx512f_loadups512;
17053 store_unaligned = gen_avx512f_storeups512;
17054 break;
17055 case V8DFmode:
17056 load_unaligned = gen_avx512f_loadupd512;
17057 store_unaligned = gen_avx512f_storeupd512;
17058 break;
17061 if (MEM_P (op1))
17062 emit_insn (load_unaligned (op0, op1));
17063 else if (MEM_P (op0))
17064 emit_insn (store_unaligned (op0, op1));
17065 else
17066 gcc_unreachable ();
17067 if (orig_op0)
17068 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17069 break;
17071 default:
17072 gcc_unreachable ();
17075 return;
17078 if (TARGET_AVX
17079 && GET_MODE_SIZE (mode) == 32)
17081 switch (GET_MODE_CLASS (mode))
17083 case MODE_VECTOR_INT:
17084 case MODE_INT:
17085 if (GET_MODE (op0) != V32QImode)
17087 if (!MEM_P (op0))
17089 orig_op0 = op0;
17090 op0 = gen_reg_rtx (V32QImode);
17092 else
17093 op0 = gen_lowpart (V32QImode, op0);
17095 op1 = gen_lowpart (V32QImode, op1);
17096 /* FALLTHRU */
17098 case MODE_VECTOR_FLOAT:
17099 ix86_avx256_split_vector_move_misalign (op0, op1);
17100 if (orig_op0)
17101 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17102 break;
17104 default:
17105 gcc_unreachable ();
17108 return;
17111 if (MEM_P (op1))
17113 /* Normal *mov<mode>_internal pattern will handle
17114 unaligned loads just fine if misaligned_operand
17115 is true, and without the UNSPEC it can be combined
17116 with arithmetic instructions. */
17117 if (TARGET_AVX
17118 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17119 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17120 && misaligned_operand (op1, GET_MODE (op1)))
17121 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17122 /* ??? If we have typed data, then it would appear that using
17123 movdqu is the only way to get unaligned data loaded with
17124 integer type. */
17125 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17127 if (GET_MODE (op0) != V16QImode)
17129 orig_op0 = op0;
17130 op0 = gen_reg_rtx (V16QImode);
17132 op1 = gen_lowpart (V16QImode, op1);
17133 /* We will eventually emit movups based on insn attributes. */
17134 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17135 if (orig_op0)
17136 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17138 else if (TARGET_SSE2 && mode == V2DFmode)
17140 rtx zero;
17142 if (TARGET_AVX
17143 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17144 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17145 || optimize_insn_for_size_p ())
17147 /* We will eventually emit movups based on insn attributes. */
17148 emit_insn (gen_sse2_loadupd (op0, op1));
17149 return;
17152 /* When SSE registers are split into halves, we can avoid
17153 writing to the top half twice. */
17154 if (TARGET_SSE_SPLIT_REGS)
17156 emit_clobber (op0);
17157 zero = op0;
17159 else
17161 /* ??? Not sure about the best option for the Intel chips.
17162 The following would seem to satisfy; the register is
17163 entirely cleared, breaking the dependency chain. We
17164 then store to the upper half, with a dependency depth
17165 of one. A rumor has it that Intel recommends two movsd
17166 followed by an unpacklpd, but this is unconfirmed. And
17167 given that the dependency depth of the unpacklpd would
17168 still be one, I'm not sure why this would be better. */
17169 zero = CONST0_RTX (V2DFmode);
17172 m = adjust_address (op1, DFmode, 0);
17173 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17174 m = adjust_address (op1, DFmode, 8);
17175 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17177 else
17179 rtx t;
17181 if (TARGET_AVX
17182 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17183 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17184 || optimize_insn_for_size_p ())
17186 if (GET_MODE (op0) != V4SFmode)
17188 orig_op0 = op0;
17189 op0 = gen_reg_rtx (V4SFmode);
17191 op1 = gen_lowpart (V4SFmode, op1);
17192 emit_insn (gen_sse_loadups (op0, op1));
17193 if (orig_op0)
17194 emit_move_insn (orig_op0,
17195 gen_lowpart (GET_MODE (orig_op0), op0));
17196 return;
17199 if (mode != V4SFmode)
17200 t = gen_reg_rtx (V4SFmode);
17201 else
17202 t = op0;
17204 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17205 emit_move_insn (t, CONST0_RTX (V4SFmode));
17206 else
17207 emit_clobber (t);
17209 m = adjust_address (op1, V2SFmode, 0);
17210 emit_insn (gen_sse_loadlps (t, t, m));
17211 m = adjust_address (op1, V2SFmode, 8);
17212 emit_insn (gen_sse_loadhps (t, t, m));
17213 if (mode != V4SFmode)
17214 emit_move_insn (op0, gen_lowpart (mode, t));
17217 else if (MEM_P (op0))
17219 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17221 op0 = gen_lowpart (V16QImode, op0);
17222 op1 = gen_lowpart (V16QImode, op1);
17223 /* We will eventually emit movups based on insn attributes. */
17224 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17226 else if (TARGET_SSE2 && mode == V2DFmode)
17228 if (TARGET_AVX
17229 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17230 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17231 || optimize_insn_for_size_p ())
17232 /* We will eventually emit movups based on insn attributes. */
17233 emit_insn (gen_sse2_storeupd (op0, op1));
17234 else
17236 m = adjust_address (op0, DFmode, 0);
17237 emit_insn (gen_sse2_storelpd (m, op1));
17238 m = adjust_address (op0, DFmode, 8);
17239 emit_insn (gen_sse2_storehpd (m, op1));
17242 else
17244 if (mode != V4SFmode)
17245 op1 = gen_lowpart (V4SFmode, op1);
17247 if (TARGET_AVX
17248 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17249 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17250 || optimize_insn_for_size_p ())
17252 op0 = gen_lowpart (V4SFmode, op0);
17253 emit_insn (gen_sse_storeups (op0, op1));
17255 else
17257 m = adjust_address (op0, V2SFmode, 0);
17258 emit_insn (gen_sse_storelps (m, op1));
17259 m = adjust_address (op0, V2SFmode, 8);
17260 emit_insn (gen_sse_storehps (m, op1));
17264 else
17265 gcc_unreachable ();
17268 /* Helper function of ix86_fixup_binary_operands to canonicalize
17269 operand order. Returns true if the operands should be swapped. */
17271 static bool
17272 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17273 rtx operands[])
17275 rtx dst = operands[0];
17276 rtx src1 = operands[1];
17277 rtx src2 = operands[2];
17279 /* If the operation is not commutative, we can't do anything. */
17280 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17281 return false;
17283 /* Highest priority is that src1 should match dst. */
17284 if (rtx_equal_p (dst, src1))
17285 return false;
17286 if (rtx_equal_p (dst, src2))
17287 return true;
17289 /* Next highest priority is that immediate constants come second. */
17290 if (immediate_operand (src2, mode))
17291 return false;
17292 if (immediate_operand (src1, mode))
17293 return true;
17295 /* Lowest priority is that memory references should come second. */
17296 if (MEM_P (src2))
17297 return false;
17298 if (MEM_P (src1))
17299 return true;
17301 return false;
17305 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17306 destination to use for the operation. If different from the true
17307 destination in operands[0], a copy operation will be required. */
17310 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17311 rtx operands[])
17313 rtx dst = operands[0];
17314 rtx src1 = operands[1];
17315 rtx src2 = operands[2];
17317 /* Canonicalize operand order. */
17318 if (ix86_swap_binary_operands_p (code, mode, operands))
17320 rtx temp;
17322 /* It is invalid to swap operands of different modes. */
17323 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17325 temp = src1;
17326 src1 = src2;
17327 src2 = temp;
17330 /* Both source operands cannot be in memory. */
17331 if (MEM_P (src1) && MEM_P (src2))
17333 /* Optimization: Only read from memory once. */
17334 if (rtx_equal_p (src1, src2))
17336 src2 = force_reg (mode, src2);
17337 src1 = src2;
17339 else if (rtx_equal_p (dst, src1))
17340 src2 = force_reg (mode, src2);
17341 else
17342 src1 = force_reg (mode, src1);
17345 /* If the destination is memory, and we do not have matching source
17346 operands, do things in registers. */
17347 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17348 dst = gen_reg_rtx (mode);
17350 /* Source 1 cannot be a constant. */
17351 if (CONSTANT_P (src1))
17352 src1 = force_reg (mode, src1);
17354 /* Source 1 cannot be a non-matching memory. */
17355 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17356 src1 = force_reg (mode, src1);
17358 /* Improve address combine. */
17359 if (code == PLUS
17360 && GET_MODE_CLASS (mode) == MODE_INT
17361 && MEM_P (src2))
17362 src2 = force_reg (mode, src2);
17364 operands[1] = src1;
17365 operands[2] = src2;
17366 return dst;
17369 /* Similarly, but assume that the destination has already been
17370 set up properly. */
17372 void
17373 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17374 enum machine_mode mode, rtx operands[])
17376 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17377 gcc_assert (dst == operands[0]);
17380 /* Attempt to expand a binary operator. Make the expansion closer to the
17381 actual machine, then just general_operand, which will allow 3 separate
17382 memory references (one output, two input) in a single insn. */
17384 void
17385 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17386 rtx operands[])
17388 rtx src1, src2, dst, op, clob;
17390 dst = ix86_fixup_binary_operands (code, mode, operands);
17391 src1 = operands[1];
17392 src2 = operands[2];
17394 /* Emit the instruction. */
17396 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17397 if (reload_in_progress)
17399 /* Reload doesn't know about the flags register, and doesn't know that
17400 it doesn't want to clobber it. We can only do this with PLUS. */
17401 gcc_assert (code == PLUS);
17402 emit_insn (op);
17404 else if (reload_completed
17405 && code == PLUS
17406 && !rtx_equal_p (dst, src1))
17408 /* This is going to be an LEA; avoid splitting it later. */
17409 emit_insn (op);
17411 else
17413 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17414 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17417 /* Fix up the destination if needed. */
17418 if (dst != operands[0])
17419 emit_move_insn (operands[0], dst);
17422 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17423 the given OPERANDS. */
17425 void
17426 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17427 rtx operands[])
17429 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17430 if (GET_CODE (operands[1]) == SUBREG)
17432 op1 = operands[1];
17433 op2 = operands[2];
17435 else if (GET_CODE (operands[2]) == SUBREG)
17437 op1 = operands[2];
17438 op2 = operands[1];
17440 /* Optimize (__m128i) d | (__m128i) e and similar code
17441 when d and e are float vectors into float vector logical
17442 insn. In C/C++ without using intrinsics there is no other way
17443 to express vector logical operation on float vectors than
17444 to cast them temporarily to integer vectors. */
17445 if (op1
17446 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17447 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17448 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17449 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17450 && SUBREG_BYTE (op1) == 0
17451 && (GET_CODE (op2) == CONST_VECTOR
17452 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17453 && SUBREG_BYTE (op2) == 0))
17454 && can_create_pseudo_p ())
17456 rtx dst;
17457 switch (GET_MODE (SUBREG_REG (op1)))
17459 case V4SFmode:
17460 case V8SFmode:
17461 case V2DFmode:
17462 case V4DFmode:
17463 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17464 if (GET_CODE (op2) == CONST_VECTOR)
17466 op2 = gen_lowpart (GET_MODE (dst), op2);
17467 op2 = force_reg (GET_MODE (dst), op2);
17469 else
17471 op1 = operands[1];
17472 op2 = SUBREG_REG (operands[2]);
17473 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17474 op2 = force_reg (GET_MODE (dst), op2);
17476 op1 = SUBREG_REG (op1);
17477 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17478 op1 = force_reg (GET_MODE (dst), op1);
17479 emit_insn (gen_rtx_SET (VOIDmode, dst,
17480 gen_rtx_fmt_ee (code, GET_MODE (dst),
17481 op1, op2)));
17482 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17483 return;
17484 default:
17485 break;
17488 if (!nonimmediate_operand (operands[1], mode))
17489 operands[1] = force_reg (mode, operands[1]);
17490 if (!nonimmediate_operand (operands[2], mode))
17491 operands[2] = force_reg (mode, operands[2]);
17492 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17493 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17494 gen_rtx_fmt_ee (code, mode, operands[1],
17495 operands[2])));
17498 /* Return TRUE or FALSE depending on whether the binary operator meets the
17499 appropriate constraints. */
17501 bool
17502 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17503 rtx operands[3])
17505 rtx dst = operands[0];
17506 rtx src1 = operands[1];
17507 rtx src2 = operands[2];
17509 /* Both source operands cannot be in memory. */
17510 if (MEM_P (src1) && MEM_P (src2))
17511 return false;
17513 /* Canonicalize operand order for commutative operators. */
17514 if (ix86_swap_binary_operands_p (code, mode, operands))
17516 rtx temp = src1;
17517 src1 = src2;
17518 src2 = temp;
17521 /* If the destination is memory, we must have a matching source operand. */
17522 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17523 return false;
17525 /* Source 1 cannot be a constant. */
17526 if (CONSTANT_P (src1))
17527 return false;
17529 /* Source 1 cannot be a non-matching memory. */
17530 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17531 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17532 return (code == AND
17533 && (mode == HImode
17534 || mode == SImode
17535 || (TARGET_64BIT && mode == DImode))
17536 && satisfies_constraint_L (src2));
17538 return true;
17541 /* Attempt to expand a unary operator. Make the expansion closer to the
17542 actual machine, then just general_operand, which will allow 2 separate
17543 memory references (one output, one input) in a single insn. */
17545 void
17546 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17547 rtx operands[])
17549 int matching_memory;
17550 rtx src, dst, op, clob;
17552 dst = operands[0];
17553 src = operands[1];
17555 /* If the destination is memory, and we do not have matching source
17556 operands, do things in registers. */
17557 matching_memory = 0;
17558 if (MEM_P (dst))
17560 if (rtx_equal_p (dst, src))
17561 matching_memory = 1;
17562 else
17563 dst = gen_reg_rtx (mode);
17566 /* When source operand is memory, destination must match. */
17567 if (MEM_P (src) && !matching_memory)
17568 src = force_reg (mode, src);
17570 /* Emit the instruction. */
17572 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17573 if (reload_in_progress || code == NOT)
17575 /* Reload doesn't know about the flags register, and doesn't know that
17576 it doesn't want to clobber it. */
17577 gcc_assert (code == NOT);
17578 emit_insn (op);
17580 else
17582 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17583 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17586 /* Fix up the destination if needed. */
17587 if (dst != operands[0])
17588 emit_move_insn (operands[0], dst);
17591 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17592 divisor are within the range [0-255]. */
17594 void
17595 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17596 bool signed_p)
17598 rtx end_label, qimode_label;
17599 rtx insn, div, mod;
17600 rtx scratch, tmp0, tmp1, tmp2;
17601 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17602 rtx (*gen_zero_extend) (rtx, rtx);
17603 rtx (*gen_test_ccno_1) (rtx, rtx);
17605 switch (mode)
17607 case SImode:
17608 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17609 gen_test_ccno_1 = gen_testsi_ccno_1;
17610 gen_zero_extend = gen_zero_extendqisi2;
17611 break;
17612 case DImode:
17613 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17614 gen_test_ccno_1 = gen_testdi_ccno_1;
17615 gen_zero_extend = gen_zero_extendqidi2;
17616 break;
17617 default:
17618 gcc_unreachable ();
17621 end_label = gen_label_rtx ();
17622 qimode_label = gen_label_rtx ();
17624 scratch = gen_reg_rtx (mode);
17626 /* Use 8bit unsigned divimod if dividend and divisor are within
17627 the range [0-255]. */
17628 emit_move_insn (scratch, operands[2]);
17629 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17630 scratch, 1, OPTAB_DIRECT);
17631 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17632 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17633 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17634 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17635 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17636 pc_rtx);
17637 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17638 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17639 JUMP_LABEL (insn) = qimode_label;
17641 /* Generate original signed/unsigned divimod. */
17642 div = gen_divmod4_1 (operands[0], operands[1],
17643 operands[2], operands[3]);
17644 emit_insn (div);
17646 /* Branch to the end. */
17647 emit_jump_insn (gen_jump (end_label));
17648 emit_barrier ();
17650 /* Generate 8bit unsigned divide. */
17651 emit_label (qimode_label);
17652 /* Don't use operands[0] for result of 8bit divide since not all
17653 registers support QImode ZERO_EXTRACT. */
17654 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17655 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17656 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17657 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17659 if (signed_p)
17661 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17662 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17664 else
17666 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17667 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17670 /* Extract remainder from AH. */
17671 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17672 if (REG_P (operands[1]))
17673 insn = emit_move_insn (operands[1], tmp1);
17674 else
17676 /* Need a new scratch register since the old one has result
17677 of 8bit divide. */
17678 scratch = gen_reg_rtx (mode);
17679 emit_move_insn (scratch, tmp1);
17680 insn = emit_move_insn (operands[1], scratch);
17682 set_unique_reg_note (insn, REG_EQUAL, mod);
17684 /* Zero extend quotient from AL. */
17685 tmp1 = gen_lowpart (QImode, tmp0);
17686 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17687 set_unique_reg_note (insn, REG_EQUAL, div);
17689 emit_label (end_label);
17692 /* Whether it is OK to emit CFI directives when emitting asm code. */
17694 bool
17695 ix86_emit_cfi ()
17697 return dwarf2out_do_cfi_asm ();
17700 #define LEA_MAX_STALL (3)
17701 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17703 /* Increase given DISTANCE in half-cycles according to
17704 dependencies between PREV and NEXT instructions.
17705 Add 1 half-cycle if there is no dependency and
17706 go to next cycle if there is some dependecy. */
17708 static unsigned int
17709 increase_distance (rtx prev, rtx next, unsigned int distance)
17711 df_ref *use_rec;
17712 df_ref *def_rec;
17714 if (!prev || !next)
17715 return distance + (distance & 1) + 2;
17717 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17718 return distance + 1;
17720 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17721 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17722 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17723 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17724 return distance + (distance & 1) + 2;
17726 return distance + 1;
17729 /* Function checks if instruction INSN defines register number
17730 REGNO1 or REGNO2. */
17732 static bool
17733 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17734 rtx insn)
17736 df_ref *def_rec;
17738 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17739 if (DF_REF_REG_DEF_P (*def_rec)
17740 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17741 && (regno1 == DF_REF_REGNO (*def_rec)
17742 || regno2 == DF_REF_REGNO (*def_rec)))
17744 return true;
17747 return false;
17750 /* Function checks if instruction INSN uses register number
17751 REGNO as a part of address expression. */
17753 static bool
17754 insn_uses_reg_mem (unsigned int regno, rtx insn)
17756 df_ref *use_rec;
17758 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17759 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17760 return true;
17762 return false;
17765 /* Search backward for non-agu definition of register number REGNO1
17766 or register number REGNO2 in basic block starting from instruction
17767 START up to head of basic block or instruction INSN.
17769 Function puts true value into *FOUND var if definition was found
17770 and false otherwise.
17772 Distance in half-cycles between START and found instruction or head
17773 of BB is added to DISTANCE and returned. */
17775 static int
17776 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17777 rtx insn, int distance,
17778 rtx start, bool *found)
17780 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17781 rtx prev = start;
17782 rtx next = NULL;
17784 *found = false;
17786 while (prev
17787 && prev != insn
17788 && distance < LEA_SEARCH_THRESHOLD)
17790 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17792 distance = increase_distance (prev, next, distance);
17793 if (insn_defines_reg (regno1, regno2, prev))
17795 if (recog_memoized (prev) < 0
17796 || get_attr_type (prev) != TYPE_LEA)
17798 *found = true;
17799 return distance;
17803 next = prev;
17805 if (prev == BB_HEAD (bb))
17806 break;
17808 prev = PREV_INSN (prev);
17811 return distance;
17814 /* Search backward for non-agu definition of register number REGNO1
17815 or register number REGNO2 in INSN's basic block until
17816 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17817 2. Reach neighbour BBs boundary, or
17818 3. Reach agu definition.
17819 Returns the distance between the non-agu definition point and INSN.
17820 If no definition point, returns -1. */
17822 static int
17823 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17824 rtx insn)
17826 basic_block bb = BLOCK_FOR_INSN (insn);
17827 int distance = 0;
17828 bool found = false;
17830 if (insn != BB_HEAD (bb))
17831 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17832 distance, PREV_INSN (insn),
17833 &found);
17835 if (!found && distance < LEA_SEARCH_THRESHOLD)
17837 edge e;
17838 edge_iterator ei;
17839 bool simple_loop = false;
17841 FOR_EACH_EDGE (e, ei, bb->preds)
17842 if (e->src == bb)
17844 simple_loop = true;
17845 break;
17848 if (simple_loop)
17849 distance = distance_non_agu_define_in_bb (regno1, regno2,
17850 insn, distance,
17851 BB_END (bb), &found);
17852 else
17854 int shortest_dist = -1;
17855 bool found_in_bb = false;
17857 FOR_EACH_EDGE (e, ei, bb->preds)
17859 int bb_dist
17860 = distance_non_agu_define_in_bb (regno1, regno2,
17861 insn, distance,
17862 BB_END (e->src),
17863 &found_in_bb);
17864 if (found_in_bb)
17866 if (shortest_dist < 0)
17867 shortest_dist = bb_dist;
17868 else if (bb_dist > 0)
17869 shortest_dist = MIN (bb_dist, shortest_dist);
17871 found = true;
17875 distance = shortest_dist;
17879 /* get_attr_type may modify recog data. We want to make sure
17880 that recog data is valid for instruction INSN, on which
17881 distance_non_agu_define is called. INSN is unchanged here. */
17882 extract_insn_cached (insn);
17884 if (!found)
17885 return -1;
17887 return distance >> 1;
17890 /* Return the distance in half-cycles between INSN and the next
17891 insn that uses register number REGNO in memory address added
17892 to DISTANCE. Return -1 if REGNO0 is set.
17894 Put true value into *FOUND if register usage was found and
17895 false otherwise.
17896 Put true value into *REDEFINED if register redefinition was
17897 found and false otherwise. */
17899 static int
17900 distance_agu_use_in_bb (unsigned int regno,
17901 rtx insn, int distance, rtx start,
17902 bool *found, bool *redefined)
17904 basic_block bb = NULL;
17905 rtx next = start;
17906 rtx prev = NULL;
17908 *found = false;
17909 *redefined = false;
17911 if (start != NULL_RTX)
17913 bb = BLOCK_FOR_INSN (start);
17914 if (start != BB_HEAD (bb))
17915 /* If insn and start belong to the same bb, set prev to insn,
17916 so the call to increase_distance will increase the distance
17917 between insns by 1. */
17918 prev = insn;
17921 while (next
17922 && next != insn
17923 && distance < LEA_SEARCH_THRESHOLD)
17925 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17927 distance = increase_distance(prev, next, distance);
17928 if (insn_uses_reg_mem (regno, next))
17930 /* Return DISTANCE if OP0 is used in memory
17931 address in NEXT. */
17932 *found = true;
17933 return distance;
17936 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17938 /* Return -1 if OP0 is set in NEXT. */
17939 *redefined = true;
17940 return -1;
17943 prev = next;
17946 if (next == BB_END (bb))
17947 break;
17949 next = NEXT_INSN (next);
17952 return distance;
17955 /* Return the distance between INSN and the next insn that uses
17956 register number REGNO0 in memory address. Return -1 if no such
17957 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17959 static int
17960 distance_agu_use (unsigned int regno0, rtx insn)
17962 basic_block bb = BLOCK_FOR_INSN (insn);
17963 int distance = 0;
17964 bool found = false;
17965 bool redefined = false;
17967 if (insn != BB_END (bb))
17968 distance = distance_agu_use_in_bb (regno0, insn, distance,
17969 NEXT_INSN (insn),
17970 &found, &redefined);
17972 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17974 edge e;
17975 edge_iterator ei;
17976 bool simple_loop = false;
17978 FOR_EACH_EDGE (e, ei, bb->succs)
17979 if (e->dest == bb)
17981 simple_loop = true;
17982 break;
17985 if (simple_loop)
17986 distance = distance_agu_use_in_bb (regno0, insn,
17987 distance, BB_HEAD (bb),
17988 &found, &redefined);
17989 else
17991 int shortest_dist = -1;
17992 bool found_in_bb = false;
17993 bool redefined_in_bb = false;
17995 FOR_EACH_EDGE (e, ei, bb->succs)
17997 int bb_dist
17998 = distance_agu_use_in_bb (regno0, insn,
17999 distance, BB_HEAD (e->dest),
18000 &found_in_bb, &redefined_in_bb);
18001 if (found_in_bb)
18003 if (shortest_dist < 0)
18004 shortest_dist = bb_dist;
18005 else if (bb_dist > 0)
18006 shortest_dist = MIN (bb_dist, shortest_dist);
18008 found = true;
18012 distance = shortest_dist;
18016 if (!found || redefined)
18017 return -1;
18019 return distance >> 1;
18022 /* Define this macro to tune LEA priority vs ADD, it take effect when
18023 there is a dilemma of choicing LEA or ADD
18024 Negative value: ADD is more preferred than LEA
18025 Zero: Netrual
18026 Positive value: LEA is more preferred than ADD*/
18027 #define IX86_LEA_PRIORITY 0
18029 /* Return true if usage of lea INSN has performance advantage
18030 over a sequence of instructions. Instructions sequence has
18031 SPLIT_COST cycles higher latency than lea latency. */
18033 static bool
18034 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18035 unsigned int regno2, int split_cost, bool has_scale)
18037 int dist_define, dist_use;
18039 /* For Silvermont if using a 2-source or 3-source LEA for
18040 non-destructive destination purposes, or due to wanting
18041 ability to use SCALE, the use of LEA is justified. */
18042 if (TARGET_SILVERMONT || TARGET_INTEL)
18044 if (has_scale)
18045 return true;
18046 if (split_cost < 1)
18047 return false;
18048 if (regno0 == regno1 || regno0 == regno2)
18049 return false;
18050 return true;
18053 dist_define = distance_non_agu_define (regno1, regno2, insn);
18054 dist_use = distance_agu_use (regno0, insn);
18056 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18058 /* If there is no non AGU operand definition, no AGU
18059 operand usage and split cost is 0 then both lea
18060 and non lea variants have same priority. Currently
18061 we prefer lea for 64 bit code and non lea on 32 bit
18062 code. */
18063 if (dist_use < 0 && split_cost == 0)
18064 return TARGET_64BIT || IX86_LEA_PRIORITY;
18065 else
18066 return true;
18069 /* With longer definitions distance lea is more preferable.
18070 Here we change it to take into account splitting cost and
18071 lea priority. */
18072 dist_define += split_cost + IX86_LEA_PRIORITY;
18074 /* If there is no use in memory addess then we just check
18075 that split cost exceeds AGU stall. */
18076 if (dist_use < 0)
18077 return dist_define > LEA_MAX_STALL;
18079 /* If this insn has both backward non-agu dependence and forward
18080 agu dependence, the one with short distance takes effect. */
18081 return dist_define >= dist_use;
18084 /* Return true if it is legal to clobber flags by INSN and
18085 false otherwise. */
18087 static bool
18088 ix86_ok_to_clobber_flags (rtx insn)
18090 basic_block bb = BLOCK_FOR_INSN (insn);
18091 df_ref *use;
18092 bitmap live;
18094 while (insn)
18096 if (NONDEBUG_INSN_P (insn))
18098 for (use = DF_INSN_USES (insn); *use; use++)
18099 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18100 return false;
18102 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18103 return true;
18106 if (insn == BB_END (bb))
18107 break;
18109 insn = NEXT_INSN (insn);
18112 live = df_get_live_out(bb);
18113 return !REGNO_REG_SET_P (live, FLAGS_REG);
18116 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18117 move and add to avoid AGU stalls. */
18119 bool
18120 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18122 unsigned int regno0, regno1, regno2;
18124 /* Check if we need to optimize. */
18125 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18126 return false;
18128 /* Check it is correct to split here. */
18129 if (!ix86_ok_to_clobber_flags(insn))
18130 return false;
18132 regno0 = true_regnum (operands[0]);
18133 regno1 = true_regnum (operands[1]);
18134 regno2 = true_regnum (operands[2]);
18136 /* We need to split only adds with non destructive
18137 destination operand. */
18138 if (regno0 == regno1 || regno0 == regno2)
18139 return false;
18140 else
18141 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18144 /* Return true if we should emit lea instruction instead of mov
18145 instruction. */
18147 bool
18148 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18150 unsigned int regno0, regno1;
18152 /* Check if we need to optimize. */
18153 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18154 return false;
18156 /* Use lea for reg to reg moves only. */
18157 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18158 return false;
18160 regno0 = true_regnum (operands[0]);
18161 regno1 = true_regnum (operands[1]);
18163 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18166 /* Return true if we need to split lea into a sequence of
18167 instructions to avoid AGU stalls. */
18169 bool
18170 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18172 unsigned int regno0, regno1, regno2;
18173 int split_cost;
18174 struct ix86_address parts;
18175 int ok;
18177 /* Check we need to optimize. */
18178 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18179 return false;
18181 /* The "at least two components" test below might not catch simple
18182 move or zero extension insns if parts.base is non-NULL and parts.disp
18183 is const0_rtx as the only components in the address, e.g. if the
18184 register is %rbp or %r13. As this test is much cheaper and moves or
18185 zero extensions are the common case, do this check first. */
18186 if (REG_P (operands[1])
18187 || (SImode_address_operand (operands[1], VOIDmode)
18188 && REG_P (XEXP (operands[1], 0))))
18189 return false;
18191 /* Check if it is OK to split here. */
18192 if (!ix86_ok_to_clobber_flags (insn))
18193 return false;
18195 ok = ix86_decompose_address (operands[1], &parts);
18196 gcc_assert (ok);
18198 /* There should be at least two components in the address. */
18199 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18200 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18201 return false;
18203 /* We should not split into add if non legitimate pic
18204 operand is used as displacement. */
18205 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18206 return false;
18208 regno0 = true_regnum (operands[0]) ;
18209 regno1 = INVALID_REGNUM;
18210 regno2 = INVALID_REGNUM;
18212 if (parts.base)
18213 regno1 = true_regnum (parts.base);
18214 if (parts.index)
18215 regno2 = true_regnum (parts.index);
18217 split_cost = 0;
18219 /* Compute how many cycles we will add to execution time
18220 if split lea into a sequence of instructions. */
18221 if (parts.base || parts.index)
18223 /* Have to use mov instruction if non desctructive
18224 destination form is used. */
18225 if (regno1 != regno0 && regno2 != regno0)
18226 split_cost += 1;
18228 /* Have to add index to base if both exist. */
18229 if (parts.base && parts.index)
18230 split_cost += 1;
18232 /* Have to use shift and adds if scale is 2 or greater. */
18233 if (parts.scale > 1)
18235 if (regno0 != regno1)
18236 split_cost += 1;
18237 else if (regno2 == regno0)
18238 split_cost += 4;
18239 else
18240 split_cost += parts.scale;
18243 /* Have to use add instruction with immediate if
18244 disp is non zero. */
18245 if (parts.disp && parts.disp != const0_rtx)
18246 split_cost += 1;
18248 /* Subtract the price of lea. */
18249 split_cost -= 1;
18252 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18253 parts.scale > 1);
18256 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18257 matches destination. RTX includes clobber of FLAGS_REG. */
18259 static void
18260 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18261 rtx dst, rtx src)
18263 rtx op, clob;
18265 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18266 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18268 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18271 /* Return true if regno1 def is nearest to the insn. */
18273 static bool
18274 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18276 rtx prev = insn;
18277 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18279 if (insn == start)
18280 return false;
18281 while (prev && prev != start)
18283 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18285 prev = PREV_INSN (prev);
18286 continue;
18288 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18289 return true;
18290 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18291 return false;
18292 prev = PREV_INSN (prev);
18295 /* None of the regs is defined in the bb. */
18296 return false;
18299 /* Split lea instructions into a sequence of instructions
18300 which are executed on ALU to avoid AGU stalls.
18301 It is assumed that it is allowed to clobber flags register
18302 at lea position. */
18304 void
18305 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18307 unsigned int regno0, regno1, regno2;
18308 struct ix86_address parts;
18309 rtx target, tmp;
18310 int ok, adds;
18312 ok = ix86_decompose_address (operands[1], &parts);
18313 gcc_assert (ok);
18315 target = gen_lowpart (mode, operands[0]);
18317 regno0 = true_regnum (target);
18318 regno1 = INVALID_REGNUM;
18319 regno2 = INVALID_REGNUM;
18321 if (parts.base)
18323 parts.base = gen_lowpart (mode, parts.base);
18324 regno1 = true_regnum (parts.base);
18327 if (parts.index)
18329 parts.index = gen_lowpart (mode, parts.index);
18330 regno2 = true_regnum (parts.index);
18333 if (parts.disp)
18334 parts.disp = gen_lowpart (mode, parts.disp);
18336 if (parts.scale > 1)
18338 /* Case r1 = r1 + ... */
18339 if (regno1 == regno0)
18341 /* If we have a case r1 = r1 + C * r2 then we
18342 should use multiplication which is very
18343 expensive. Assume cost model is wrong if we
18344 have such case here. */
18345 gcc_assert (regno2 != regno0);
18347 for (adds = parts.scale; adds > 0; adds--)
18348 ix86_emit_binop (PLUS, mode, target, parts.index);
18350 else
18352 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18353 if (regno0 != regno2)
18354 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18356 /* Use shift for scaling. */
18357 ix86_emit_binop (ASHIFT, mode, target,
18358 GEN_INT (exact_log2 (parts.scale)));
18360 if (parts.base)
18361 ix86_emit_binop (PLUS, mode, target, parts.base);
18363 if (parts.disp && parts.disp != const0_rtx)
18364 ix86_emit_binop (PLUS, mode, target, parts.disp);
18367 else if (!parts.base && !parts.index)
18369 gcc_assert(parts.disp);
18370 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18372 else
18374 if (!parts.base)
18376 if (regno0 != regno2)
18377 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18379 else if (!parts.index)
18381 if (regno0 != regno1)
18382 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18384 else
18386 if (regno0 == regno1)
18387 tmp = parts.index;
18388 else if (regno0 == regno2)
18389 tmp = parts.base;
18390 else
18392 rtx tmp1;
18394 /* Find better operand for SET instruction, depending
18395 on which definition is farther from the insn. */
18396 if (find_nearest_reg_def (insn, regno1, regno2))
18397 tmp = parts.index, tmp1 = parts.base;
18398 else
18399 tmp = parts.base, tmp1 = parts.index;
18401 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18403 if (parts.disp && parts.disp != const0_rtx)
18404 ix86_emit_binop (PLUS, mode, target, parts.disp);
18406 ix86_emit_binop (PLUS, mode, target, tmp1);
18407 return;
18410 ix86_emit_binop (PLUS, mode, target, tmp);
18413 if (parts.disp && parts.disp != const0_rtx)
18414 ix86_emit_binop (PLUS, mode, target, parts.disp);
18418 /* Return true if it is ok to optimize an ADD operation to LEA
18419 operation to avoid flag register consumation. For most processors,
18420 ADD is faster than LEA. For the processors like BONNELL, if the
18421 destination register of LEA holds an actual address which will be
18422 used soon, LEA is better and otherwise ADD is better. */
18424 bool
18425 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18427 unsigned int regno0 = true_regnum (operands[0]);
18428 unsigned int regno1 = true_regnum (operands[1]);
18429 unsigned int regno2 = true_regnum (operands[2]);
18431 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18432 if (regno0 != regno1 && regno0 != regno2)
18433 return true;
18435 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18436 return false;
18438 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18441 /* Return true if destination reg of SET_BODY is shift count of
18442 USE_BODY. */
18444 static bool
18445 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18447 rtx set_dest;
18448 rtx shift_rtx;
18449 int i;
18451 /* Retrieve destination of SET_BODY. */
18452 switch (GET_CODE (set_body))
18454 case SET:
18455 set_dest = SET_DEST (set_body);
18456 if (!set_dest || !REG_P (set_dest))
18457 return false;
18458 break;
18459 case PARALLEL:
18460 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18461 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18462 use_body))
18463 return true;
18464 default:
18465 return false;
18466 break;
18469 /* Retrieve shift count of USE_BODY. */
18470 switch (GET_CODE (use_body))
18472 case SET:
18473 shift_rtx = XEXP (use_body, 1);
18474 break;
18475 case PARALLEL:
18476 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18477 if (ix86_dep_by_shift_count_body (set_body,
18478 XVECEXP (use_body, 0, i)))
18479 return true;
18480 default:
18481 return false;
18482 break;
18485 if (shift_rtx
18486 && (GET_CODE (shift_rtx) == ASHIFT
18487 || GET_CODE (shift_rtx) == LSHIFTRT
18488 || GET_CODE (shift_rtx) == ASHIFTRT
18489 || GET_CODE (shift_rtx) == ROTATE
18490 || GET_CODE (shift_rtx) == ROTATERT))
18492 rtx shift_count = XEXP (shift_rtx, 1);
18494 /* Return true if shift count is dest of SET_BODY. */
18495 if (REG_P (shift_count))
18497 /* Add check since it can be invoked before register
18498 allocation in pre-reload schedule. */
18499 if (reload_completed
18500 && true_regnum (set_dest) == true_regnum (shift_count))
18501 return true;
18502 else if (REGNO(set_dest) == REGNO(shift_count))
18503 return true;
18507 return false;
18510 /* Return true if destination reg of SET_INSN is shift count of
18511 USE_INSN. */
18513 bool
18514 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18516 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18517 PATTERN (use_insn));
18520 /* Return TRUE or FALSE depending on whether the unary operator meets the
18521 appropriate constraints. */
18523 bool
18524 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18525 enum machine_mode mode ATTRIBUTE_UNUSED,
18526 rtx operands[2])
18528 /* If one of operands is memory, source and destination must match. */
18529 if ((MEM_P (operands[0])
18530 || MEM_P (operands[1]))
18531 && ! rtx_equal_p (operands[0], operands[1]))
18532 return false;
18533 return true;
18536 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18537 are ok, keeping in mind the possible movddup alternative. */
18539 bool
18540 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18542 if (MEM_P (operands[0]))
18543 return rtx_equal_p (operands[0], operands[1 + high]);
18544 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18545 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18546 return true;
18549 /* Post-reload splitter for converting an SF or DFmode value in an
18550 SSE register into an unsigned SImode. */
18552 void
18553 ix86_split_convert_uns_si_sse (rtx operands[])
18555 enum machine_mode vecmode;
18556 rtx value, large, zero_or_two31, input, two31, x;
18558 large = operands[1];
18559 zero_or_two31 = operands[2];
18560 input = operands[3];
18561 two31 = operands[4];
18562 vecmode = GET_MODE (large);
18563 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18565 /* Load up the value into the low element. We must ensure that the other
18566 elements are valid floats -- zero is the easiest such value. */
18567 if (MEM_P (input))
18569 if (vecmode == V4SFmode)
18570 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18571 else
18572 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18574 else
18576 input = gen_rtx_REG (vecmode, REGNO (input));
18577 emit_move_insn (value, CONST0_RTX (vecmode));
18578 if (vecmode == V4SFmode)
18579 emit_insn (gen_sse_movss (value, value, input));
18580 else
18581 emit_insn (gen_sse2_movsd (value, value, input));
18584 emit_move_insn (large, two31);
18585 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18587 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18588 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18590 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18591 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18593 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18594 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18596 large = gen_rtx_REG (V4SImode, REGNO (large));
18597 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18599 x = gen_rtx_REG (V4SImode, REGNO (value));
18600 if (vecmode == V4SFmode)
18601 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18602 else
18603 emit_insn (gen_sse2_cvttpd2dq (x, value));
18604 value = x;
18606 emit_insn (gen_xorv4si3 (value, value, large));
18609 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18610 Expects the 64-bit DImode to be supplied in a pair of integral
18611 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18612 -mfpmath=sse, !optimize_size only. */
18614 void
18615 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18617 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18618 rtx int_xmm, fp_xmm;
18619 rtx biases, exponents;
18620 rtx x;
18622 int_xmm = gen_reg_rtx (V4SImode);
18623 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18624 emit_insn (gen_movdi_to_sse (int_xmm, input));
18625 else if (TARGET_SSE_SPLIT_REGS)
18627 emit_clobber (int_xmm);
18628 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18630 else
18632 x = gen_reg_rtx (V2DImode);
18633 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18634 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18637 x = gen_rtx_CONST_VECTOR (V4SImode,
18638 gen_rtvec (4, GEN_INT (0x43300000UL),
18639 GEN_INT (0x45300000UL),
18640 const0_rtx, const0_rtx));
18641 exponents = validize_mem (force_const_mem (V4SImode, x));
18643 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18644 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18646 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18647 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18648 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18649 (0x1.0p84 + double(fp_value_hi_xmm)).
18650 Note these exponents differ by 32. */
18652 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18654 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18655 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18656 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18657 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18658 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18659 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18660 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18661 biases = validize_mem (force_const_mem (V2DFmode, biases));
18662 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18664 /* Add the upper and lower DFmode values together. */
18665 if (TARGET_SSE3)
18666 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18667 else
18669 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18670 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18671 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18674 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18677 /* Not used, but eases macroization of patterns. */
18678 void
18679 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18680 rtx input ATTRIBUTE_UNUSED)
18682 gcc_unreachable ();
18685 /* Convert an unsigned SImode value into a DFmode. Only currently used
18686 for SSE, but applicable anywhere. */
18688 void
18689 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18691 REAL_VALUE_TYPE TWO31r;
18692 rtx x, fp;
18694 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18695 NULL, 1, OPTAB_DIRECT);
18697 fp = gen_reg_rtx (DFmode);
18698 emit_insn (gen_floatsidf2 (fp, x));
18700 real_ldexp (&TWO31r, &dconst1, 31);
18701 x = const_double_from_real_value (TWO31r, DFmode);
18703 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18704 if (x != target)
18705 emit_move_insn (target, x);
18708 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18709 32-bit mode; otherwise we have a direct convert instruction. */
18711 void
18712 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18714 REAL_VALUE_TYPE TWO32r;
18715 rtx fp_lo, fp_hi, x;
18717 fp_lo = gen_reg_rtx (DFmode);
18718 fp_hi = gen_reg_rtx (DFmode);
18720 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18722 real_ldexp (&TWO32r, &dconst1, 32);
18723 x = const_double_from_real_value (TWO32r, DFmode);
18724 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18726 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18728 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18729 0, OPTAB_DIRECT);
18730 if (x != target)
18731 emit_move_insn (target, x);
18734 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18735 For x86_32, -mfpmath=sse, !optimize_size only. */
18736 void
18737 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18739 REAL_VALUE_TYPE ONE16r;
18740 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18742 real_ldexp (&ONE16r, &dconst1, 16);
18743 x = const_double_from_real_value (ONE16r, SFmode);
18744 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18745 NULL, 0, OPTAB_DIRECT);
18746 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18747 NULL, 0, OPTAB_DIRECT);
18748 fp_hi = gen_reg_rtx (SFmode);
18749 fp_lo = gen_reg_rtx (SFmode);
18750 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18751 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18752 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18753 0, OPTAB_DIRECT);
18754 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18755 0, OPTAB_DIRECT);
18756 if (!rtx_equal_p (target, fp_hi))
18757 emit_move_insn (target, fp_hi);
18760 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18761 a vector of unsigned ints VAL to vector of floats TARGET. */
18763 void
18764 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18766 rtx tmp[8];
18767 REAL_VALUE_TYPE TWO16r;
18768 enum machine_mode intmode = GET_MODE (val);
18769 enum machine_mode fltmode = GET_MODE (target);
18770 rtx (*cvt) (rtx, rtx);
18772 if (intmode == V4SImode)
18773 cvt = gen_floatv4siv4sf2;
18774 else
18775 cvt = gen_floatv8siv8sf2;
18776 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18777 tmp[0] = force_reg (intmode, tmp[0]);
18778 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18779 OPTAB_DIRECT);
18780 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18781 NULL_RTX, 1, OPTAB_DIRECT);
18782 tmp[3] = gen_reg_rtx (fltmode);
18783 emit_insn (cvt (tmp[3], tmp[1]));
18784 tmp[4] = gen_reg_rtx (fltmode);
18785 emit_insn (cvt (tmp[4], tmp[2]));
18786 real_ldexp (&TWO16r, &dconst1, 16);
18787 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18788 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18789 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18790 OPTAB_DIRECT);
18791 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18792 OPTAB_DIRECT);
18793 if (tmp[7] != target)
18794 emit_move_insn (target, tmp[7]);
18797 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18798 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18799 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18800 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18803 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18805 REAL_VALUE_TYPE TWO31r;
18806 rtx two31r, tmp[4];
18807 enum machine_mode mode = GET_MODE (val);
18808 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18809 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18810 rtx (*cmp) (rtx, rtx, rtx, rtx);
18811 int i;
18813 for (i = 0; i < 3; i++)
18814 tmp[i] = gen_reg_rtx (mode);
18815 real_ldexp (&TWO31r, &dconst1, 31);
18816 two31r = const_double_from_real_value (TWO31r, scalarmode);
18817 two31r = ix86_build_const_vector (mode, 1, two31r);
18818 two31r = force_reg (mode, two31r);
18819 switch (mode)
18821 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18822 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18823 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18824 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18825 default: gcc_unreachable ();
18827 tmp[3] = gen_rtx_LE (mode, two31r, val);
18828 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18829 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18830 0, OPTAB_DIRECT);
18831 if (intmode == V4SImode || TARGET_AVX2)
18832 *xorp = expand_simple_binop (intmode, ASHIFT,
18833 gen_lowpart (intmode, tmp[0]),
18834 GEN_INT (31), NULL_RTX, 0,
18835 OPTAB_DIRECT);
18836 else
18838 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18839 two31 = ix86_build_const_vector (intmode, 1, two31);
18840 *xorp = expand_simple_binop (intmode, AND,
18841 gen_lowpart (intmode, tmp[0]),
18842 two31, NULL_RTX, 0,
18843 OPTAB_DIRECT);
18845 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18846 0, OPTAB_DIRECT);
18849 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18850 then replicate the value for all elements of the vector
18851 register. */
18854 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18856 int i, n_elt;
18857 rtvec v;
18858 enum machine_mode scalar_mode;
18860 switch (mode)
18862 case V64QImode:
18863 case V32QImode:
18864 case V16QImode:
18865 case V32HImode:
18866 case V16HImode:
18867 case V8HImode:
18868 case V16SImode:
18869 case V8SImode:
18870 case V4SImode:
18871 case V8DImode:
18872 case V4DImode:
18873 case V2DImode:
18874 gcc_assert (vect);
18875 case V16SFmode:
18876 case V8SFmode:
18877 case V4SFmode:
18878 case V8DFmode:
18879 case V4DFmode:
18880 case V2DFmode:
18881 n_elt = GET_MODE_NUNITS (mode);
18882 v = rtvec_alloc (n_elt);
18883 scalar_mode = GET_MODE_INNER (mode);
18885 RTVEC_ELT (v, 0) = value;
18887 for (i = 1; i < n_elt; ++i)
18888 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18890 return gen_rtx_CONST_VECTOR (mode, v);
18892 default:
18893 gcc_unreachable ();
18897 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18898 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18899 for an SSE register. If VECT is true, then replicate the mask for
18900 all elements of the vector register. If INVERT is true, then create
18901 a mask excluding the sign bit. */
18904 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18906 enum machine_mode vec_mode, imode;
18907 HOST_WIDE_INT hi, lo;
18908 int shift = 63;
18909 rtx v;
18910 rtx mask;
18912 /* Find the sign bit, sign extended to 2*HWI. */
18913 switch (mode)
18915 case V16SImode:
18916 case V16SFmode:
18917 case V8SImode:
18918 case V4SImode:
18919 case V8SFmode:
18920 case V4SFmode:
18921 vec_mode = mode;
18922 mode = GET_MODE_INNER (mode);
18923 imode = SImode;
18924 lo = 0x80000000, hi = lo < 0;
18925 break;
18927 case V8DImode:
18928 case V4DImode:
18929 case V2DImode:
18930 case V8DFmode:
18931 case V4DFmode:
18932 case V2DFmode:
18933 vec_mode = mode;
18934 mode = GET_MODE_INNER (mode);
18935 imode = DImode;
18936 if (HOST_BITS_PER_WIDE_INT >= 64)
18937 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18938 else
18939 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18940 break;
18942 case TImode:
18943 case TFmode:
18944 vec_mode = VOIDmode;
18945 if (HOST_BITS_PER_WIDE_INT >= 64)
18947 imode = TImode;
18948 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18950 else
18952 rtvec vec;
18954 imode = DImode;
18955 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18957 if (invert)
18959 lo = ~lo, hi = ~hi;
18960 v = constm1_rtx;
18962 else
18963 v = const0_rtx;
18965 mask = immed_double_const (lo, hi, imode);
18967 vec = gen_rtvec (2, v, mask);
18968 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18969 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18971 return v;
18973 break;
18975 default:
18976 gcc_unreachable ();
18979 if (invert)
18980 lo = ~lo, hi = ~hi;
18982 /* Force this value into the low part of a fp vector constant. */
18983 mask = immed_double_const (lo, hi, imode);
18984 mask = gen_lowpart (mode, mask);
18986 if (vec_mode == VOIDmode)
18987 return force_reg (mode, mask);
18989 v = ix86_build_const_vector (vec_mode, vect, mask);
18990 return force_reg (vec_mode, v);
18993 /* Generate code for floating point ABS or NEG. */
18995 void
18996 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
18997 rtx operands[])
18999 rtx mask, set, dst, src;
19000 bool use_sse = false;
19001 bool vector_mode = VECTOR_MODE_P (mode);
19002 enum machine_mode vmode = mode;
19004 if (vector_mode)
19005 use_sse = true;
19006 else if (mode == TFmode)
19007 use_sse = true;
19008 else if (TARGET_SSE_MATH)
19010 use_sse = SSE_FLOAT_MODE_P (mode);
19011 if (mode == SFmode)
19012 vmode = V4SFmode;
19013 else if (mode == DFmode)
19014 vmode = V2DFmode;
19017 /* NEG and ABS performed with SSE use bitwise mask operations.
19018 Create the appropriate mask now. */
19019 if (use_sse)
19020 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19021 else
19022 mask = NULL_RTX;
19024 dst = operands[0];
19025 src = operands[1];
19027 set = gen_rtx_fmt_e (code, mode, src);
19028 set = gen_rtx_SET (VOIDmode, dst, set);
19030 if (mask)
19032 rtx use, clob;
19033 rtvec par;
19035 use = gen_rtx_USE (VOIDmode, mask);
19036 if (vector_mode)
19037 par = gen_rtvec (2, set, use);
19038 else
19040 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19041 par = gen_rtvec (3, set, use, clob);
19043 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19045 else
19046 emit_insn (set);
19049 /* Expand a copysign operation. Special case operand 0 being a constant. */
19051 void
19052 ix86_expand_copysign (rtx operands[])
19054 enum machine_mode mode, vmode;
19055 rtx dest, op0, op1, mask, nmask;
19057 dest = operands[0];
19058 op0 = operands[1];
19059 op1 = operands[2];
19061 mode = GET_MODE (dest);
19063 if (mode == SFmode)
19064 vmode = V4SFmode;
19065 else if (mode == DFmode)
19066 vmode = V2DFmode;
19067 else
19068 vmode = mode;
19070 if (GET_CODE (op0) == CONST_DOUBLE)
19072 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19074 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19075 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19077 if (mode == SFmode || mode == DFmode)
19079 if (op0 == CONST0_RTX (mode))
19080 op0 = CONST0_RTX (vmode);
19081 else
19083 rtx v = ix86_build_const_vector (vmode, false, op0);
19085 op0 = force_reg (vmode, v);
19088 else if (op0 != CONST0_RTX (mode))
19089 op0 = force_reg (mode, op0);
19091 mask = ix86_build_signbit_mask (vmode, 0, 0);
19093 if (mode == SFmode)
19094 copysign_insn = gen_copysignsf3_const;
19095 else if (mode == DFmode)
19096 copysign_insn = gen_copysigndf3_const;
19097 else
19098 copysign_insn = gen_copysigntf3_const;
19100 emit_insn (copysign_insn (dest, op0, op1, mask));
19102 else
19104 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19106 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19107 mask = ix86_build_signbit_mask (vmode, 0, 0);
19109 if (mode == SFmode)
19110 copysign_insn = gen_copysignsf3_var;
19111 else if (mode == DFmode)
19112 copysign_insn = gen_copysigndf3_var;
19113 else
19114 copysign_insn = gen_copysigntf3_var;
19116 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19120 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19121 be a constant, and so has already been expanded into a vector constant. */
19123 void
19124 ix86_split_copysign_const (rtx operands[])
19126 enum machine_mode mode, vmode;
19127 rtx dest, op0, mask, x;
19129 dest = operands[0];
19130 op0 = operands[1];
19131 mask = operands[3];
19133 mode = GET_MODE (dest);
19134 vmode = GET_MODE (mask);
19136 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19137 x = gen_rtx_AND (vmode, dest, mask);
19138 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19140 if (op0 != CONST0_RTX (vmode))
19142 x = gen_rtx_IOR (vmode, dest, op0);
19143 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19147 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19148 so we have to do two masks. */
19150 void
19151 ix86_split_copysign_var (rtx operands[])
19153 enum machine_mode mode, vmode;
19154 rtx dest, scratch, op0, op1, mask, nmask, x;
19156 dest = operands[0];
19157 scratch = operands[1];
19158 op0 = operands[2];
19159 op1 = operands[3];
19160 nmask = operands[4];
19161 mask = operands[5];
19163 mode = GET_MODE (dest);
19164 vmode = GET_MODE (mask);
19166 if (rtx_equal_p (op0, op1))
19168 /* Shouldn't happen often (it's useless, obviously), but when it does
19169 we'd generate incorrect code if we continue below. */
19170 emit_move_insn (dest, op0);
19171 return;
19174 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19176 gcc_assert (REGNO (op1) == REGNO (scratch));
19178 x = gen_rtx_AND (vmode, scratch, mask);
19179 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19181 dest = mask;
19182 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19183 x = gen_rtx_NOT (vmode, dest);
19184 x = gen_rtx_AND (vmode, x, op0);
19185 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19187 else
19189 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19191 x = gen_rtx_AND (vmode, scratch, mask);
19193 else /* alternative 2,4 */
19195 gcc_assert (REGNO (mask) == REGNO (scratch));
19196 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19197 x = gen_rtx_AND (vmode, scratch, op1);
19199 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19201 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19203 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19204 x = gen_rtx_AND (vmode, dest, nmask);
19206 else /* alternative 3,4 */
19208 gcc_assert (REGNO (nmask) == REGNO (dest));
19209 dest = nmask;
19210 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19211 x = gen_rtx_AND (vmode, dest, op0);
19213 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19216 x = gen_rtx_IOR (vmode, dest, scratch);
19217 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19220 /* Return TRUE or FALSE depending on whether the first SET in INSN
19221 has source and destination with matching CC modes, and that the
19222 CC mode is at least as constrained as REQ_MODE. */
19224 bool
19225 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19227 rtx set;
19228 enum machine_mode set_mode;
19230 set = PATTERN (insn);
19231 if (GET_CODE (set) == PARALLEL)
19232 set = XVECEXP (set, 0, 0);
19233 gcc_assert (GET_CODE (set) == SET);
19234 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19236 set_mode = GET_MODE (SET_DEST (set));
19237 switch (set_mode)
19239 case CCNOmode:
19240 if (req_mode != CCNOmode
19241 && (req_mode != CCmode
19242 || XEXP (SET_SRC (set), 1) != const0_rtx))
19243 return false;
19244 break;
19245 case CCmode:
19246 if (req_mode == CCGCmode)
19247 return false;
19248 /* FALLTHRU */
19249 case CCGCmode:
19250 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19251 return false;
19252 /* FALLTHRU */
19253 case CCGOCmode:
19254 if (req_mode == CCZmode)
19255 return false;
19256 /* FALLTHRU */
19257 case CCZmode:
19258 break;
19260 case CCAmode:
19261 case CCCmode:
19262 case CCOmode:
19263 case CCSmode:
19264 if (set_mode != req_mode)
19265 return false;
19266 break;
19268 default:
19269 gcc_unreachable ();
19272 return GET_MODE (SET_SRC (set)) == set_mode;
19275 /* Generate insn patterns to do an integer compare of OPERANDS. */
19277 static rtx
19278 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19280 enum machine_mode cmpmode;
19281 rtx tmp, flags;
19283 cmpmode = SELECT_CC_MODE (code, op0, op1);
19284 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19286 /* This is very simple, but making the interface the same as in the
19287 FP case makes the rest of the code easier. */
19288 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19289 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19291 /* Return the test that should be put into the flags user, i.e.
19292 the bcc, scc, or cmov instruction. */
19293 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19296 /* Figure out whether to use ordered or unordered fp comparisons.
19297 Return the appropriate mode to use. */
19299 enum machine_mode
19300 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19302 /* ??? In order to make all comparisons reversible, we do all comparisons
19303 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19304 all forms trapping and nontrapping comparisons, we can make inequality
19305 comparisons trapping again, since it results in better code when using
19306 FCOM based compares. */
19307 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19310 enum machine_mode
19311 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19313 enum machine_mode mode = GET_MODE (op0);
19315 if (SCALAR_FLOAT_MODE_P (mode))
19317 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19318 return ix86_fp_compare_mode (code);
19321 switch (code)
19323 /* Only zero flag is needed. */
19324 case EQ: /* ZF=0 */
19325 case NE: /* ZF!=0 */
19326 return CCZmode;
19327 /* Codes needing carry flag. */
19328 case GEU: /* CF=0 */
19329 case LTU: /* CF=1 */
19330 /* Detect overflow checks. They need just the carry flag. */
19331 if (GET_CODE (op0) == PLUS
19332 && rtx_equal_p (op1, XEXP (op0, 0)))
19333 return CCCmode;
19334 else
19335 return CCmode;
19336 case GTU: /* CF=0 & ZF=0 */
19337 case LEU: /* CF=1 | ZF=1 */
19338 return CCmode;
19339 /* Codes possibly doable only with sign flag when
19340 comparing against zero. */
19341 case GE: /* SF=OF or SF=0 */
19342 case LT: /* SF<>OF or SF=1 */
19343 if (op1 == const0_rtx)
19344 return CCGOCmode;
19345 else
19346 /* For other cases Carry flag is not required. */
19347 return CCGCmode;
19348 /* Codes doable only with sign flag when comparing
19349 against zero, but we miss jump instruction for it
19350 so we need to use relational tests against overflow
19351 that thus needs to be zero. */
19352 case GT: /* ZF=0 & SF=OF */
19353 case LE: /* ZF=1 | SF<>OF */
19354 if (op1 == const0_rtx)
19355 return CCNOmode;
19356 else
19357 return CCGCmode;
19358 /* strcmp pattern do (use flags) and combine may ask us for proper
19359 mode. */
19360 case USE:
19361 return CCmode;
19362 default:
19363 gcc_unreachable ();
19367 /* Return the fixed registers used for condition codes. */
19369 static bool
19370 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19372 *p1 = FLAGS_REG;
19373 *p2 = FPSR_REG;
19374 return true;
19377 /* If two condition code modes are compatible, return a condition code
19378 mode which is compatible with both. Otherwise, return
19379 VOIDmode. */
19381 static enum machine_mode
19382 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19384 if (m1 == m2)
19385 return m1;
19387 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19388 return VOIDmode;
19390 if ((m1 == CCGCmode && m2 == CCGOCmode)
19391 || (m1 == CCGOCmode && m2 == CCGCmode))
19392 return CCGCmode;
19394 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19395 return m2;
19396 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19397 return m1;
19399 switch (m1)
19401 default:
19402 gcc_unreachable ();
19404 case CCmode:
19405 case CCGCmode:
19406 case CCGOCmode:
19407 case CCNOmode:
19408 case CCAmode:
19409 case CCCmode:
19410 case CCOmode:
19411 case CCSmode:
19412 case CCZmode:
19413 switch (m2)
19415 default:
19416 return VOIDmode;
19418 case CCmode:
19419 case CCGCmode:
19420 case CCGOCmode:
19421 case CCNOmode:
19422 case CCAmode:
19423 case CCCmode:
19424 case CCOmode:
19425 case CCSmode:
19426 case CCZmode:
19427 return CCmode;
19430 case CCFPmode:
19431 case CCFPUmode:
19432 /* These are only compatible with themselves, which we already
19433 checked above. */
19434 return VOIDmode;
19439 /* Return a comparison we can do and that it is equivalent to
19440 swap_condition (code) apart possibly from orderedness.
19441 But, never change orderedness if TARGET_IEEE_FP, returning
19442 UNKNOWN in that case if necessary. */
19444 static enum rtx_code
19445 ix86_fp_swap_condition (enum rtx_code code)
19447 switch (code)
19449 case GT: /* GTU - CF=0 & ZF=0 */
19450 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19451 case GE: /* GEU - CF=0 */
19452 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19453 case UNLT: /* LTU - CF=1 */
19454 return TARGET_IEEE_FP ? UNKNOWN : GT;
19455 case UNLE: /* LEU - CF=1 | ZF=1 */
19456 return TARGET_IEEE_FP ? UNKNOWN : GE;
19457 default:
19458 return swap_condition (code);
19462 /* Return cost of comparison CODE using the best strategy for performance.
19463 All following functions do use number of instructions as a cost metrics.
19464 In future this should be tweaked to compute bytes for optimize_size and
19465 take into account performance of various instructions on various CPUs. */
19467 static int
19468 ix86_fp_comparison_cost (enum rtx_code code)
19470 int arith_cost;
19472 /* The cost of code using bit-twiddling on %ah. */
19473 switch (code)
19475 case UNLE:
19476 case UNLT:
19477 case LTGT:
19478 case GT:
19479 case GE:
19480 case UNORDERED:
19481 case ORDERED:
19482 case UNEQ:
19483 arith_cost = 4;
19484 break;
19485 case LT:
19486 case NE:
19487 case EQ:
19488 case UNGE:
19489 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19490 break;
19491 case LE:
19492 case UNGT:
19493 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19494 break;
19495 default:
19496 gcc_unreachable ();
19499 switch (ix86_fp_comparison_strategy (code))
19501 case IX86_FPCMP_COMI:
19502 return arith_cost > 4 ? 3 : 2;
19503 case IX86_FPCMP_SAHF:
19504 return arith_cost > 4 ? 4 : 3;
19505 default:
19506 return arith_cost;
19510 /* Return strategy to use for floating-point. We assume that fcomi is always
19511 preferrable where available, since that is also true when looking at size
19512 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19514 enum ix86_fpcmp_strategy
19515 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19517 /* Do fcomi/sahf based test when profitable. */
19519 if (TARGET_CMOVE)
19520 return IX86_FPCMP_COMI;
19522 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19523 return IX86_FPCMP_SAHF;
19525 return IX86_FPCMP_ARITH;
19528 /* Swap, force into registers, or otherwise massage the two operands
19529 to a fp comparison. The operands are updated in place; the new
19530 comparison code is returned. */
19532 static enum rtx_code
19533 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19535 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19536 rtx op0 = *pop0, op1 = *pop1;
19537 enum machine_mode op_mode = GET_MODE (op0);
19538 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19540 /* All of the unordered compare instructions only work on registers.
19541 The same is true of the fcomi compare instructions. The XFmode
19542 compare instructions require registers except when comparing
19543 against zero or when converting operand 1 from fixed point to
19544 floating point. */
19546 if (!is_sse
19547 && (fpcmp_mode == CCFPUmode
19548 || (op_mode == XFmode
19549 && ! (standard_80387_constant_p (op0) == 1
19550 || standard_80387_constant_p (op1) == 1)
19551 && GET_CODE (op1) != FLOAT)
19552 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19554 op0 = force_reg (op_mode, op0);
19555 op1 = force_reg (op_mode, op1);
19557 else
19559 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19560 things around if they appear profitable, otherwise force op0
19561 into a register. */
19563 if (standard_80387_constant_p (op0) == 0
19564 || (MEM_P (op0)
19565 && ! (standard_80387_constant_p (op1) == 0
19566 || MEM_P (op1))))
19568 enum rtx_code new_code = ix86_fp_swap_condition (code);
19569 if (new_code != UNKNOWN)
19571 rtx tmp;
19572 tmp = op0, op0 = op1, op1 = tmp;
19573 code = new_code;
19577 if (!REG_P (op0))
19578 op0 = force_reg (op_mode, op0);
19580 if (CONSTANT_P (op1))
19582 int tmp = standard_80387_constant_p (op1);
19583 if (tmp == 0)
19584 op1 = validize_mem (force_const_mem (op_mode, op1));
19585 else if (tmp == 1)
19587 if (TARGET_CMOVE)
19588 op1 = force_reg (op_mode, op1);
19590 else
19591 op1 = force_reg (op_mode, op1);
19595 /* Try to rearrange the comparison to make it cheaper. */
19596 if (ix86_fp_comparison_cost (code)
19597 > ix86_fp_comparison_cost (swap_condition (code))
19598 && (REG_P (op1) || can_create_pseudo_p ()))
19600 rtx tmp;
19601 tmp = op0, op0 = op1, op1 = tmp;
19602 code = swap_condition (code);
19603 if (!REG_P (op0))
19604 op0 = force_reg (op_mode, op0);
19607 *pop0 = op0;
19608 *pop1 = op1;
19609 return code;
19612 /* Convert comparison codes we use to represent FP comparison to integer
19613 code that will result in proper branch. Return UNKNOWN if no such code
19614 is available. */
19616 enum rtx_code
19617 ix86_fp_compare_code_to_integer (enum rtx_code code)
19619 switch (code)
19621 case GT:
19622 return GTU;
19623 case GE:
19624 return GEU;
19625 case ORDERED:
19626 case UNORDERED:
19627 return code;
19628 break;
19629 case UNEQ:
19630 return EQ;
19631 break;
19632 case UNLT:
19633 return LTU;
19634 break;
19635 case UNLE:
19636 return LEU;
19637 break;
19638 case LTGT:
19639 return NE;
19640 break;
19641 default:
19642 return UNKNOWN;
19646 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19648 static rtx
19649 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19651 enum machine_mode fpcmp_mode, intcmp_mode;
19652 rtx tmp, tmp2;
19654 fpcmp_mode = ix86_fp_compare_mode (code);
19655 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19657 /* Do fcomi/sahf based test when profitable. */
19658 switch (ix86_fp_comparison_strategy (code))
19660 case IX86_FPCMP_COMI:
19661 intcmp_mode = fpcmp_mode;
19662 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19663 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19664 tmp);
19665 emit_insn (tmp);
19666 break;
19668 case IX86_FPCMP_SAHF:
19669 intcmp_mode = fpcmp_mode;
19670 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19671 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19672 tmp);
19674 if (!scratch)
19675 scratch = gen_reg_rtx (HImode);
19676 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19677 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19678 break;
19680 case IX86_FPCMP_ARITH:
19681 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19682 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19683 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19684 if (!scratch)
19685 scratch = gen_reg_rtx (HImode);
19686 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19688 /* In the unordered case, we have to check C2 for NaN's, which
19689 doesn't happen to work out to anything nice combination-wise.
19690 So do some bit twiddling on the value we've got in AH to come
19691 up with an appropriate set of condition codes. */
19693 intcmp_mode = CCNOmode;
19694 switch (code)
19696 case GT:
19697 case UNGT:
19698 if (code == GT || !TARGET_IEEE_FP)
19700 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19701 code = EQ;
19703 else
19705 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19706 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19707 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19708 intcmp_mode = CCmode;
19709 code = GEU;
19711 break;
19712 case LT:
19713 case UNLT:
19714 if (code == LT && TARGET_IEEE_FP)
19716 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19717 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19718 intcmp_mode = CCmode;
19719 code = EQ;
19721 else
19723 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19724 code = NE;
19726 break;
19727 case GE:
19728 case UNGE:
19729 if (code == GE || !TARGET_IEEE_FP)
19731 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19732 code = EQ;
19734 else
19736 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19737 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19738 code = NE;
19740 break;
19741 case LE:
19742 case UNLE:
19743 if (code == LE && TARGET_IEEE_FP)
19745 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19746 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19747 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19748 intcmp_mode = CCmode;
19749 code = LTU;
19751 else
19753 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19754 code = NE;
19756 break;
19757 case EQ:
19758 case UNEQ:
19759 if (code == EQ && TARGET_IEEE_FP)
19761 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19762 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19763 intcmp_mode = CCmode;
19764 code = EQ;
19766 else
19768 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19769 code = NE;
19771 break;
19772 case NE:
19773 case LTGT:
19774 if (code == NE && TARGET_IEEE_FP)
19776 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19777 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19778 GEN_INT (0x40)));
19779 code = NE;
19781 else
19783 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19784 code = EQ;
19786 break;
19788 case UNORDERED:
19789 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19790 code = NE;
19791 break;
19792 case ORDERED:
19793 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19794 code = EQ;
19795 break;
19797 default:
19798 gcc_unreachable ();
19800 break;
19802 default:
19803 gcc_unreachable();
19806 /* Return the test that should be put into the flags user, i.e.
19807 the bcc, scc, or cmov instruction. */
19808 return gen_rtx_fmt_ee (code, VOIDmode,
19809 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19810 const0_rtx);
19813 static rtx
19814 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19816 rtx ret;
19818 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19819 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19821 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19823 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19824 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19826 else
19827 ret = ix86_expand_int_compare (code, op0, op1);
19829 return ret;
19832 void
19833 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19835 enum machine_mode mode = GET_MODE (op0);
19836 rtx tmp;
19838 switch (mode)
19840 case SFmode:
19841 case DFmode:
19842 case XFmode:
19843 case QImode:
19844 case HImode:
19845 case SImode:
19846 simple:
19847 tmp = ix86_expand_compare (code, op0, op1);
19848 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19849 gen_rtx_LABEL_REF (VOIDmode, label),
19850 pc_rtx);
19851 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19852 return;
19854 case DImode:
19855 if (TARGET_64BIT)
19856 goto simple;
19857 case TImode:
19858 /* Expand DImode branch into multiple compare+branch. */
19860 rtx lo[2], hi[2], label2;
19861 enum rtx_code code1, code2, code3;
19862 enum machine_mode submode;
19864 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19866 tmp = op0, op0 = op1, op1 = tmp;
19867 code = swap_condition (code);
19870 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19871 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19873 submode = mode == DImode ? SImode : DImode;
19875 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19876 avoid two branches. This costs one extra insn, so disable when
19877 optimizing for size. */
19879 if ((code == EQ || code == NE)
19880 && (!optimize_insn_for_size_p ()
19881 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19883 rtx xor0, xor1;
19885 xor1 = hi[0];
19886 if (hi[1] != const0_rtx)
19887 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19888 NULL_RTX, 0, OPTAB_WIDEN);
19890 xor0 = lo[0];
19891 if (lo[1] != const0_rtx)
19892 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19893 NULL_RTX, 0, OPTAB_WIDEN);
19895 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19896 NULL_RTX, 0, OPTAB_WIDEN);
19898 ix86_expand_branch (code, tmp, const0_rtx, label);
19899 return;
19902 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19903 op1 is a constant and the low word is zero, then we can just
19904 examine the high word. Similarly for low word -1 and
19905 less-or-equal-than or greater-than. */
19907 if (CONST_INT_P (hi[1]))
19908 switch (code)
19910 case LT: case LTU: case GE: case GEU:
19911 if (lo[1] == const0_rtx)
19913 ix86_expand_branch (code, hi[0], hi[1], label);
19914 return;
19916 break;
19917 case LE: case LEU: case GT: case GTU:
19918 if (lo[1] == constm1_rtx)
19920 ix86_expand_branch (code, hi[0], hi[1], label);
19921 return;
19923 break;
19924 default:
19925 break;
19928 /* Otherwise, we need two or three jumps. */
19930 label2 = gen_label_rtx ();
19932 code1 = code;
19933 code2 = swap_condition (code);
19934 code3 = unsigned_condition (code);
19936 switch (code)
19938 case LT: case GT: case LTU: case GTU:
19939 break;
19941 case LE: code1 = LT; code2 = GT; break;
19942 case GE: code1 = GT; code2 = LT; break;
19943 case LEU: code1 = LTU; code2 = GTU; break;
19944 case GEU: code1 = GTU; code2 = LTU; break;
19946 case EQ: code1 = UNKNOWN; code2 = NE; break;
19947 case NE: code2 = UNKNOWN; break;
19949 default:
19950 gcc_unreachable ();
19954 * a < b =>
19955 * if (hi(a) < hi(b)) goto true;
19956 * if (hi(a) > hi(b)) goto false;
19957 * if (lo(a) < lo(b)) goto true;
19958 * false:
19961 if (code1 != UNKNOWN)
19962 ix86_expand_branch (code1, hi[0], hi[1], label);
19963 if (code2 != UNKNOWN)
19964 ix86_expand_branch (code2, hi[0], hi[1], label2);
19966 ix86_expand_branch (code3, lo[0], lo[1], label);
19968 if (code2 != UNKNOWN)
19969 emit_label (label2);
19970 return;
19973 default:
19974 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19975 goto simple;
19979 /* Split branch based on floating point condition. */
19980 void
19981 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19982 rtx target1, rtx target2, rtx tmp, rtx pushed)
19984 rtx condition;
19985 rtx i;
19987 if (target2 != pc_rtx)
19989 rtx tmp = target2;
19990 code = reverse_condition_maybe_unordered (code);
19991 target2 = target1;
19992 target1 = tmp;
19995 condition = ix86_expand_fp_compare (code, op1, op2,
19996 tmp);
19998 /* Remove pushed operand from stack. */
19999 if (pushed)
20000 ix86_free_from_memory (GET_MODE (pushed));
20002 i = emit_jump_insn (gen_rtx_SET
20003 (VOIDmode, pc_rtx,
20004 gen_rtx_IF_THEN_ELSE (VOIDmode,
20005 condition, target1, target2)));
20006 if (split_branch_probability >= 0)
20007 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20010 void
20011 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20013 rtx ret;
20015 gcc_assert (GET_MODE (dest) == QImode);
20017 ret = ix86_expand_compare (code, op0, op1);
20018 PUT_MODE (ret, QImode);
20019 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20022 /* Expand comparison setting or clearing carry flag. Return true when
20023 successful and set pop for the operation. */
20024 static bool
20025 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20027 enum machine_mode mode =
20028 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20030 /* Do not handle double-mode compares that go through special path. */
20031 if (mode == (TARGET_64BIT ? TImode : DImode))
20032 return false;
20034 if (SCALAR_FLOAT_MODE_P (mode))
20036 rtx compare_op, compare_seq;
20038 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20040 /* Shortcut: following common codes never translate
20041 into carry flag compares. */
20042 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20043 || code == ORDERED || code == UNORDERED)
20044 return false;
20046 /* These comparisons require zero flag; swap operands so they won't. */
20047 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20048 && !TARGET_IEEE_FP)
20050 rtx tmp = op0;
20051 op0 = op1;
20052 op1 = tmp;
20053 code = swap_condition (code);
20056 /* Try to expand the comparison and verify that we end up with
20057 carry flag based comparison. This fails to be true only when
20058 we decide to expand comparison using arithmetic that is not
20059 too common scenario. */
20060 start_sequence ();
20061 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20062 compare_seq = get_insns ();
20063 end_sequence ();
20065 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20066 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20067 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20068 else
20069 code = GET_CODE (compare_op);
20071 if (code != LTU && code != GEU)
20072 return false;
20074 emit_insn (compare_seq);
20075 *pop = compare_op;
20076 return true;
20079 if (!INTEGRAL_MODE_P (mode))
20080 return false;
20082 switch (code)
20084 case LTU:
20085 case GEU:
20086 break;
20088 /* Convert a==0 into (unsigned)a<1. */
20089 case EQ:
20090 case NE:
20091 if (op1 != const0_rtx)
20092 return false;
20093 op1 = const1_rtx;
20094 code = (code == EQ ? LTU : GEU);
20095 break;
20097 /* Convert a>b into b<a or a>=b-1. */
20098 case GTU:
20099 case LEU:
20100 if (CONST_INT_P (op1))
20102 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20103 /* Bail out on overflow. We still can swap operands but that
20104 would force loading of the constant into register. */
20105 if (op1 == const0_rtx
20106 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20107 return false;
20108 code = (code == GTU ? GEU : LTU);
20110 else
20112 rtx tmp = op1;
20113 op1 = op0;
20114 op0 = tmp;
20115 code = (code == GTU ? LTU : GEU);
20117 break;
20119 /* Convert a>=0 into (unsigned)a<0x80000000. */
20120 case LT:
20121 case GE:
20122 if (mode == DImode || op1 != const0_rtx)
20123 return false;
20124 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20125 code = (code == LT ? GEU : LTU);
20126 break;
20127 case LE:
20128 case GT:
20129 if (mode == DImode || op1 != constm1_rtx)
20130 return false;
20131 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20132 code = (code == LE ? GEU : LTU);
20133 break;
20135 default:
20136 return false;
20138 /* Swapping operands may cause constant to appear as first operand. */
20139 if (!nonimmediate_operand (op0, VOIDmode))
20141 if (!can_create_pseudo_p ())
20142 return false;
20143 op0 = force_reg (mode, op0);
20145 *pop = ix86_expand_compare (code, op0, op1);
20146 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20147 return true;
20150 bool
20151 ix86_expand_int_movcc (rtx operands[])
20153 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20154 rtx compare_seq, compare_op;
20155 enum machine_mode mode = GET_MODE (operands[0]);
20156 bool sign_bit_compare_p = false;
20157 rtx op0 = XEXP (operands[1], 0);
20158 rtx op1 = XEXP (operands[1], 1);
20160 if (GET_MODE (op0) == TImode
20161 || (GET_MODE (op0) == DImode
20162 && !TARGET_64BIT))
20163 return false;
20165 start_sequence ();
20166 compare_op = ix86_expand_compare (code, op0, op1);
20167 compare_seq = get_insns ();
20168 end_sequence ();
20170 compare_code = GET_CODE (compare_op);
20172 if ((op1 == const0_rtx && (code == GE || code == LT))
20173 || (op1 == constm1_rtx && (code == GT || code == LE)))
20174 sign_bit_compare_p = true;
20176 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20177 HImode insns, we'd be swallowed in word prefix ops. */
20179 if ((mode != HImode || TARGET_FAST_PREFIX)
20180 && (mode != (TARGET_64BIT ? TImode : DImode))
20181 && CONST_INT_P (operands[2])
20182 && CONST_INT_P (operands[3]))
20184 rtx out = operands[0];
20185 HOST_WIDE_INT ct = INTVAL (operands[2]);
20186 HOST_WIDE_INT cf = INTVAL (operands[3]);
20187 HOST_WIDE_INT diff;
20189 diff = ct - cf;
20190 /* Sign bit compares are better done using shifts than we do by using
20191 sbb. */
20192 if (sign_bit_compare_p
20193 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20195 /* Detect overlap between destination and compare sources. */
20196 rtx tmp = out;
20198 if (!sign_bit_compare_p)
20200 rtx flags;
20201 bool fpcmp = false;
20203 compare_code = GET_CODE (compare_op);
20205 flags = XEXP (compare_op, 0);
20207 if (GET_MODE (flags) == CCFPmode
20208 || GET_MODE (flags) == CCFPUmode)
20210 fpcmp = true;
20211 compare_code
20212 = ix86_fp_compare_code_to_integer (compare_code);
20215 /* To simplify rest of code, restrict to the GEU case. */
20216 if (compare_code == LTU)
20218 HOST_WIDE_INT tmp = ct;
20219 ct = cf;
20220 cf = tmp;
20221 compare_code = reverse_condition (compare_code);
20222 code = reverse_condition (code);
20224 else
20226 if (fpcmp)
20227 PUT_CODE (compare_op,
20228 reverse_condition_maybe_unordered
20229 (GET_CODE (compare_op)));
20230 else
20231 PUT_CODE (compare_op,
20232 reverse_condition (GET_CODE (compare_op)));
20234 diff = ct - cf;
20236 if (reg_overlap_mentioned_p (out, op0)
20237 || reg_overlap_mentioned_p (out, op1))
20238 tmp = gen_reg_rtx (mode);
20240 if (mode == DImode)
20241 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20242 else
20243 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20244 flags, compare_op));
20246 else
20248 if (code == GT || code == GE)
20249 code = reverse_condition (code);
20250 else
20252 HOST_WIDE_INT tmp = ct;
20253 ct = cf;
20254 cf = tmp;
20255 diff = ct - cf;
20257 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20260 if (diff == 1)
20263 * cmpl op0,op1
20264 * sbbl dest,dest
20265 * [addl dest, ct]
20267 * Size 5 - 8.
20269 if (ct)
20270 tmp = expand_simple_binop (mode, PLUS,
20271 tmp, GEN_INT (ct),
20272 copy_rtx (tmp), 1, OPTAB_DIRECT);
20274 else if (cf == -1)
20277 * cmpl op0,op1
20278 * sbbl dest,dest
20279 * orl $ct, dest
20281 * Size 8.
20283 tmp = expand_simple_binop (mode, IOR,
20284 tmp, GEN_INT (ct),
20285 copy_rtx (tmp), 1, OPTAB_DIRECT);
20287 else if (diff == -1 && ct)
20290 * cmpl op0,op1
20291 * sbbl dest,dest
20292 * notl dest
20293 * [addl dest, cf]
20295 * Size 8 - 11.
20297 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20298 if (cf)
20299 tmp = expand_simple_binop (mode, PLUS,
20300 copy_rtx (tmp), GEN_INT (cf),
20301 copy_rtx (tmp), 1, OPTAB_DIRECT);
20303 else
20306 * cmpl op0,op1
20307 * sbbl dest,dest
20308 * [notl dest]
20309 * andl cf - ct, dest
20310 * [addl dest, ct]
20312 * Size 8 - 11.
20315 if (cf == 0)
20317 cf = ct;
20318 ct = 0;
20319 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20322 tmp = expand_simple_binop (mode, AND,
20323 copy_rtx (tmp),
20324 gen_int_mode (cf - ct, mode),
20325 copy_rtx (tmp), 1, OPTAB_DIRECT);
20326 if (ct)
20327 tmp = expand_simple_binop (mode, PLUS,
20328 copy_rtx (tmp), GEN_INT (ct),
20329 copy_rtx (tmp), 1, OPTAB_DIRECT);
20332 if (!rtx_equal_p (tmp, out))
20333 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20335 return true;
20338 if (diff < 0)
20340 enum machine_mode cmp_mode = GET_MODE (op0);
20342 HOST_WIDE_INT tmp;
20343 tmp = ct, ct = cf, cf = tmp;
20344 diff = -diff;
20346 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20348 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20350 /* We may be reversing unordered compare to normal compare, that
20351 is not valid in general (we may convert non-trapping condition
20352 to trapping one), however on i386 we currently emit all
20353 comparisons unordered. */
20354 compare_code = reverse_condition_maybe_unordered (compare_code);
20355 code = reverse_condition_maybe_unordered (code);
20357 else
20359 compare_code = reverse_condition (compare_code);
20360 code = reverse_condition (code);
20364 compare_code = UNKNOWN;
20365 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20366 && CONST_INT_P (op1))
20368 if (op1 == const0_rtx
20369 && (code == LT || code == GE))
20370 compare_code = code;
20371 else if (op1 == constm1_rtx)
20373 if (code == LE)
20374 compare_code = LT;
20375 else if (code == GT)
20376 compare_code = GE;
20380 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20381 if (compare_code != UNKNOWN
20382 && GET_MODE (op0) == GET_MODE (out)
20383 && (cf == -1 || ct == -1))
20385 /* If lea code below could be used, only optimize
20386 if it results in a 2 insn sequence. */
20388 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20389 || diff == 3 || diff == 5 || diff == 9)
20390 || (compare_code == LT && ct == -1)
20391 || (compare_code == GE && cf == -1))
20394 * notl op1 (if necessary)
20395 * sarl $31, op1
20396 * orl cf, op1
20398 if (ct != -1)
20400 cf = ct;
20401 ct = -1;
20402 code = reverse_condition (code);
20405 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20407 out = expand_simple_binop (mode, IOR,
20408 out, GEN_INT (cf),
20409 out, 1, OPTAB_DIRECT);
20410 if (out != operands[0])
20411 emit_move_insn (operands[0], out);
20413 return true;
20418 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20419 || diff == 3 || diff == 5 || diff == 9)
20420 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20421 && (mode != DImode
20422 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20425 * xorl dest,dest
20426 * cmpl op1,op2
20427 * setcc dest
20428 * lea cf(dest*(ct-cf)),dest
20430 * Size 14.
20432 * This also catches the degenerate setcc-only case.
20435 rtx tmp;
20436 int nops;
20438 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20440 nops = 0;
20441 /* On x86_64 the lea instruction operates on Pmode, so we need
20442 to get arithmetics done in proper mode to match. */
20443 if (diff == 1)
20444 tmp = copy_rtx (out);
20445 else
20447 rtx out1;
20448 out1 = copy_rtx (out);
20449 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20450 nops++;
20451 if (diff & 1)
20453 tmp = gen_rtx_PLUS (mode, tmp, out1);
20454 nops++;
20457 if (cf != 0)
20459 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20460 nops++;
20462 if (!rtx_equal_p (tmp, out))
20464 if (nops == 1)
20465 out = force_operand (tmp, copy_rtx (out));
20466 else
20467 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20469 if (!rtx_equal_p (out, operands[0]))
20470 emit_move_insn (operands[0], copy_rtx (out));
20472 return true;
20476 * General case: Jumpful:
20477 * xorl dest,dest cmpl op1, op2
20478 * cmpl op1, op2 movl ct, dest
20479 * setcc dest jcc 1f
20480 * decl dest movl cf, dest
20481 * andl (cf-ct),dest 1:
20482 * addl ct,dest
20484 * Size 20. Size 14.
20486 * This is reasonably steep, but branch mispredict costs are
20487 * high on modern cpus, so consider failing only if optimizing
20488 * for space.
20491 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20492 && BRANCH_COST (optimize_insn_for_speed_p (),
20493 false) >= 2)
20495 if (cf == 0)
20497 enum machine_mode cmp_mode = GET_MODE (op0);
20499 cf = ct;
20500 ct = 0;
20502 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20504 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20506 /* We may be reversing unordered compare to normal compare,
20507 that is not valid in general (we may convert non-trapping
20508 condition to trapping one), however on i386 we currently
20509 emit all comparisons unordered. */
20510 code = reverse_condition_maybe_unordered (code);
20512 else
20514 code = reverse_condition (code);
20515 if (compare_code != UNKNOWN)
20516 compare_code = reverse_condition (compare_code);
20520 if (compare_code != UNKNOWN)
20522 /* notl op1 (if needed)
20523 sarl $31, op1
20524 andl (cf-ct), op1
20525 addl ct, op1
20527 For x < 0 (resp. x <= -1) there will be no notl,
20528 so if possible swap the constants to get rid of the
20529 complement.
20530 True/false will be -1/0 while code below (store flag
20531 followed by decrement) is 0/-1, so the constants need
20532 to be exchanged once more. */
20534 if (compare_code == GE || !cf)
20536 code = reverse_condition (code);
20537 compare_code = LT;
20539 else
20541 HOST_WIDE_INT tmp = cf;
20542 cf = ct;
20543 ct = tmp;
20546 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20548 else
20550 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20552 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20553 constm1_rtx,
20554 copy_rtx (out), 1, OPTAB_DIRECT);
20557 out = expand_simple_binop (mode, AND, copy_rtx (out),
20558 gen_int_mode (cf - ct, mode),
20559 copy_rtx (out), 1, OPTAB_DIRECT);
20560 if (ct)
20561 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20562 copy_rtx (out), 1, OPTAB_DIRECT);
20563 if (!rtx_equal_p (out, operands[0]))
20564 emit_move_insn (operands[0], copy_rtx (out));
20566 return true;
20570 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20572 /* Try a few things more with specific constants and a variable. */
20574 optab op;
20575 rtx var, orig_out, out, tmp;
20577 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20578 return false;
20580 /* If one of the two operands is an interesting constant, load a
20581 constant with the above and mask it in with a logical operation. */
20583 if (CONST_INT_P (operands[2]))
20585 var = operands[3];
20586 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20587 operands[3] = constm1_rtx, op = and_optab;
20588 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20589 operands[3] = const0_rtx, op = ior_optab;
20590 else
20591 return false;
20593 else if (CONST_INT_P (operands[3]))
20595 var = operands[2];
20596 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20597 operands[2] = constm1_rtx, op = and_optab;
20598 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20599 operands[2] = const0_rtx, op = ior_optab;
20600 else
20601 return false;
20603 else
20604 return false;
20606 orig_out = operands[0];
20607 tmp = gen_reg_rtx (mode);
20608 operands[0] = tmp;
20610 /* Recurse to get the constant loaded. */
20611 if (ix86_expand_int_movcc (operands) == 0)
20612 return false;
20614 /* Mask in the interesting variable. */
20615 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20616 OPTAB_WIDEN);
20617 if (!rtx_equal_p (out, orig_out))
20618 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20620 return true;
20624 * For comparison with above,
20626 * movl cf,dest
20627 * movl ct,tmp
20628 * cmpl op1,op2
20629 * cmovcc tmp,dest
20631 * Size 15.
20634 if (! nonimmediate_operand (operands[2], mode))
20635 operands[2] = force_reg (mode, operands[2]);
20636 if (! nonimmediate_operand (operands[3], mode))
20637 operands[3] = force_reg (mode, operands[3]);
20639 if (! register_operand (operands[2], VOIDmode)
20640 && (mode == QImode
20641 || ! register_operand (operands[3], VOIDmode)))
20642 operands[2] = force_reg (mode, operands[2]);
20644 if (mode == QImode
20645 && ! register_operand (operands[3], VOIDmode))
20646 operands[3] = force_reg (mode, operands[3]);
20648 emit_insn (compare_seq);
20649 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20650 gen_rtx_IF_THEN_ELSE (mode,
20651 compare_op, operands[2],
20652 operands[3])));
20653 return true;
20656 /* Swap, force into registers, or otherwise massage the two operands
20657 to an sse comparison with a mask result. Thus we differ a bit from
20658 ix86_prepare_fp_compare_args which expects to produce a flags result.
20660 The DEST operand exists to help determine whether to commute commutative
20661 operators. The POP0/POP1 operands are updated in place. The new
20662 comparison code is returned, or UNKNOWN if not implementable. */
20664 static enum rtx_code
20665 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20666 rtx *pop0, rtx *pop1)
20668 rtx tmp;
20670 switch (code)
20672 case LTGT:
20673 case UNEQ:
20674 /* AVX supports all the needed comparisons. */
20675 if (TARGET_AVX)
20676 break;
20677 /* We have no LTGT as an operator. We could implement it with
20678 NE & ORDERED, but this requires an extra temporary. It's
20679 not clear that it's worth it. */
20680 return UNKNOWN;
20682 case LT:
20683 case LE:
20684 case UNGT:
20685 case UNGE:
20686 /* These are supported directly. */
20687 break;
20689 case EQ:
20690 case NE:
20691 case UNORDERED:
20692 case ORDERED:
20693 /* AVX has 3 operand comparisons, no need to swap anything. */
20694 if (TARGET_AVX)
20695 break;
20696 /* For commutative operators, try to canonicalize the destination
20697 operand to be first in the comparison - this helps reload to
20698 avoid extra moves. */
20699 if (!dest || !rtx_equal_p (dest, *pop1))
20700 break;
20701 /* FALLTHRU */
20703 case GE:
20704 case GT:
20705 case UNLE:
20706 case UNLT:
20707 /* These are not supported directly before AVX, and furthermore
20708 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20709 comparison operands to transform into something that is
20710 supported. */
20711 tmp = *pop0;
20712 *pop0 = *pop1;
20713 *pop1 = tmp;
20714 code = swap_condition (code);
20715 break;
20717 default:
20718 gcc_unreachable ();
20721 return code;
20724 /* Detect conditional moves that exactly match min/max operational
20725 semantics. Note that this is IEEE safe, as long as we don't
20726 interchange the operands.
20728 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20729 and TRUE if the operation is successful and instructions are emitted. */
20731 static bool
20732 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20733 rtx cmp_op1, rtx if_true, rtx if_false)
20735 enum machine_mode mode;
20736 bool is_min;
20737 rtx tmp;
20739 if (code == LT)
20741 else if (code == UNGE)
20743 tmp = if_true;
20744 if_true = if_false;
20745 if_false = tmp;
20747 else
20748 return false;
20750 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20751 is_min = true;
20752 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20753 is_min = false;
20754 else
20755 return false;
20757 mode = GET_MODE (dest);
20759 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20760 but MODE may be a vector mode and thus not appropriate. */
20761 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20763 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20764 rtvec v;
20766 if_true = force_reg (mode, if_true);
20767 v = gen_rtvec (2, if_true, if_false);
20768 tmp = gen_rtx_UNSPEC (mode, v, u);
20770 else
20772 code = is_min ? SMIN : SMAX;
20773 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20776 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20777 return true;
20780 /* Expand an sse vector comparison. Return the register with the result. */
20782 static rtx
20783 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20784 rtx op_true, rtx op_false)
20786 enum machine_mode mode = GET_MODE (dest);
20787 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20789 /* In general case result of comparison can differ from operands' type. */
20790 enum machine_mode cmp_mode;
20792 /* In AVX512F the result of comparison is an integer mask. */
20793 bool maskcmp = false;
20794 rtx x;
20796 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20798 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20799 gcc_assert (cmp_mode != BLKmode);
20801 maskcmp = true;
20803 else
20804 cmp_mode = cmp_ops_mode;
20807 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20808 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20809 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20811 if (optimize
20812 || reg_overlap_mentioned_p (dest, op_true)
20813 || reg_overlap_mentioned_p (dest, op_false))
20814 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20816 /* Compare patterns for int modes are unspec in AVX512F only. */
20817 if (maskcmp && (code == GT || code == EQ))
20819 rtx (*gen)(rtx, rtx, rtx);
20821 switch (cmp_ops_mode)
20823 case V16SImode:
20824 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20825 break;
20826 case V8DImode:
20827 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20828 break;
20829 default:
20830 gen = NULL;
20833 if (gen)
20835 emit_insn (gen (dest, cmp_op0, cmp_op1));
20836 return dest;
20839 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20841 if (cmp_mode != mode && !maskcmp)
20843 x = force_reg (cmp_ops_mode, x);
20844 convert_move (dest, x, false);
20846 else
20847 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20849 return dest;
20852 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20853 operations. This is used for both scalar and vector conditional moves. */
20855 static void
20856 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20858 enum machine_mode mode = GET_MODE (dest);
20859 enum machine_mode cmpmode = GET_MODE (cmp);
20861 /* In AVX512F the result of comparison is an integer mask. */
20862 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20864 rtx t2, t3, x;
20866 if (vector_all_ones_operand (op_true, mode)
20867 && rtx_equal_p (op_false, CONST0_RTX (mode))
20868 && !maskcmp)
20870 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20872 else if (op_false == CONST0_RTX (mode)
20873 && !maskcmp)
20875 op_true = force_reg (mode, op_true);
20876 x = gen_rtx_AND (mode, cmp, op_true);
20877 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20879 else if (op_true == CONST0_RTX (mode)
20880 && !maskcmp)
20882 op_false = force_reg (mode, op_false);
20883 x = gen_rtx_NOT (mode, cmp);
20884 x = gen_rtx_AND (mode, x, op_false);
20885 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20887 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20888 && !maskcmp)
20890 op_false = force_reg (mode, op_false);
20891 x = gen_rtx_IOR (mode, cmp, op_false);
20892 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20894 else if (TARGET_XOP
20895 && !maskcmp)
20897 op_true = force_reg (mode, op_true);
20899 if (!nonimmediate_operand (op_false, mode))
20900 op_false = force_reg (mode, op_false);
20902 emit_insn (gen_rtx_SET (mode, dest,
20903 gen_rtx_IF_THEN_ELSE (mode, cmp,
20904 op_true,
20905 op_false)));
20907 else
20909 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20910 rtx d = dest;
20912 if (!nonimmediate_operand (op_true, mode))
20913 op_true = force_reg (mode, op_true);
20915 op_false = force_reg (mode, op_false);
20917 switch (mode)
20919 case V4SFmode:
20920 if (TARGET_SSE4_1)
20921 gen = gen_sse4_1_blendvps;
20922 break;
20923 case V2DFmode:
20924 if (TARGET_SSE4_1)
20925 gen = gen_sse4_1_blendvpd;
20926 break;
20927 case V16QImode:
20928 case V8HImode:
20929 case V4SImode:
20930 case V2DImode:
20931 if (TARGET_SSE4_1)
20933 gen = gen_sse4_1_pblendvb;
20934 if (mode != V16QImode)
20935 d = gen_reg_rtx (V16QImode);
20936 op_false = gen_lowpart (V16QImode, op_false);
20937 op_true = gen_lowpart (V16QImode, op_true);
20938 cmp = gen_lowpart (V16QImode, cmp);
20940 break;
20941 case V8SFmode:
20942 if (TARGET_AVX)
20943 gen = gen_avx_blendvps256;
20944 break;
20945 case V4DFmode:
20946 if (TARGET_AVX)
20947 gen = gen_avx_blendvpd256;
20948 break;
20949 case V32QImode:
20950 case V16HImode:
20951 case V8SImode:
20952 case V4DImode:
20953 if (TARGET_AVX2)
20955 gen = gen_avx2_pblendvb;
20956 if (mode != V32QImode)
20957 d = gen_reg_rtx (V32QImode);
20958 op_false = gen_lowpart (V32QImode, op_false);
20959 op_true = gen_lowpart (V32QImode, op_true);
20960 cmp = gen_lowpart (V32QImode, cmp);
20962 break;
20964 case V16SImode:
20965 gen = gen_avx512f_blendmv16si;
20966 break;
20967 case V8DImode:
20968 gen = gen_avx512f_blendmv8di;
20969 break;
20970 case V8DFmode:
20971 gen = gen_avx512f_blendmv8df;
20972 break;
20973 case V16SFmode:
20974 gen = gen_avx512f_blendmv16sf;
20975 break;
20977 default:
20978 break;
20981 if (gen != NULL)
20983 emit_insn (gen (d, op_false, op_true, cmp));
20984 if (d != dest)
20985 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
20987 else
20989 op_true = force_reg (mode, op_true);
20991 t2 = gen_reg_rtx (mode);
20992 if (optimize)
20993 t3 = gen_reg_rtx (mode);
20994 else
20995 t3 = dest;
20997 x = gen_rtx_AND (mode, op_true, cmp);
20998 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21000 x = gen_rtx_NOT (mode, cmp);
21001 x = gen_rtx_AND (mode, x, op_false);
21002 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21004 x = gen_rtx_IOR (mode, t3, t2);
21005 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21010 /* Expand a floating-point conditional move. Return true if successful. */
21012 bool
21013 ix86_expand_fp_movcc (rtx operands[])
21015 enum machine_mode mode = GET_MODE (operands[0]);
21016 enum rtx_code code = GET_CODE (operands[1]);
21017 rtx tmp, compare_op;
21018 rtx op0 = XEXP (operands[1], 0);
21019 rtx op1 = XEXP (operands[1], 1);
21021 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21023 enum machine_mode cmode;
21025 /* Since we've no cmove for sse registers, don't force bad register
21026 allocation just to gain access to it. Deny movcc when the
21027 comparison mode doesn't match the move mode. */
21028 cmode = GET_MODE (op0);
21029 if (cmode == VOIDmode)
21030 cmode = GET_MODE (op1);
21031 if (cmode != mode)
21032 return false;
21034 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21035 if (code == UNKNOWN)
21036 return false;
21038 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21039 operands[2], operands[3]))
21040 return true;
21042 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21043 operands[2], operands[3]);
21044 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21045 return true;
21048 if (GET_MODE (op0) == TImode
21049 || (GET_MODE (op0) == DImode
21050 && !TARGET_64BIT))
21051 return false;
21053 /* The floating point conditional move instructions don't directly
21054 support conditions resulting from a signed integer comparison. */
21056 compare_op = ix86_expand_compare (code, op0, op1);
21057 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21059 tmp = gen_reg_rtx (QImode);
21060 ix86_expand_setcc (tmp, code, op0, op1);
21062 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21065 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21066 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21067 operands[2], operands[3])));
21069 return true;
21072 /* Expand a floating-point vector conditional move; a vcond operation
21073 rather than a movcc operation. */
21075 bool
21076 ix86_expand_fp_vcond (rtx operands[])
21078 enum rtx_code code = GET_CODE (operands[3]);
21079 rtx cmp;
21081 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21082 &operands[4], &operands[5]);
21083 if (code == UNKNOWN)
21085 rtx temp;
21086 switch (GET_CODE (operands[3]))
21088 case LTGT:
21089 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21090 operands[5], operands[0], operands[0]);
21091 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21092 operands[5], operands[1], operands[2]);
21093 code = AND;
21094 break;
21095 case UNEQ:
21096 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21097 operands[5], operands[0], operands[0]);
21098 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21099 operands[5], operands[1], operands[2]);
21100 code = IOR;
21101 break;
21102 default:
21103 gcc_unreachable ();
21105 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21106 OPTAB_DIRECT);
21107 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21108 return true;
21111 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21112 operands[5], operands[1], operands[2]))
21113 return true;
21115 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21116 operands[1], operands[2]);
21117 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21118 return true;
21121 /* Expand a signed/unsigned integral vector conditional move. */
21123 bool
21124 ix86_expand_int_vcond (rtx operands[])
21126 enum machine_mode data_mode = GET_MODE (operands[0]);
21127 enum machine_mode mode = GET_MODE (operands[4]);
21128 enum rtx_code code = GET_CODE (operands[3]);
21129 bool negate = false;
21130 rtx x, cop0, cop1;
21132 cop0 = operands[4];
21133 cop1 = operands[5];
21135 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21136 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21137 if ((code == LT || code == GE)
21138 && data_mode == mode
21139 && cop1 == CONST0_RTX (mode)
21140 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21141 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21142 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21143 && (GET_MODE_SIZE (data_mode) == 16
21144 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21146 rtx negop = operands[2 - (code == LT)];
21147 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21148 if (negop == CONST1_RTX (data_mode))
21150 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21151 operands[0], 1, OPTAB_DIRECT);
21152 if (res != operands[0])
21153 emit_move_insn (operands[0], res);
21154 return true;
21156 else if (GET_MODE_INNER (data_mode) != DImode
21157 && vector_all_ones_operand (negop, data_mode))
21159 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21160 operands[0], 0, OPTAB_DIRECT);
21161 if (res != operands[0])
21162 emit_move_insn (operands[0], res);
21163 return true;
21167 if (!nonimmediate_operand (cop1, mode))
21168 cop1 = force_reg (mode, cop1);
21169 if (!general_operand (operands[1], data_mode))
21170 operands[1] = force_reg (data_mode, operands[1]);
21171 if (!general_operand (operands[2], data_mode))
21172 operands[2] = force_reg (data_mode, operands[2]);
21174 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21175 if (TARGET_XOP
21176 && (mode == V16QImode || mode == V8HImode
21177 || mode == V4SImode || mode == V2DImode))
21179 else
21181 /* Canonicalize the comparison to EQ, GT, GTU. */
21182 switch (code)
21184 case EQ:
21185 case GT:
21186 case GTU:
21187 break;
21189 case NE:
21190 case LE:
21191 case LEU:
21192 code = reverse_condition (code);
21193 negate = true;
21194 break;
21196 case GE:
21197 case GEU:
21198 code = reverse_condition (code);
21199 negate = true;
21200 /* FALLTHRU */
21202 case LT:
21203 case LTU:
21204 code = swap_condition (code);
21205 x = cop0, cop0 = cop1, cop1 = x;
21206 break;
21208 default:
21209 gcc_unreachable ();
21212 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21213 if (mode == V2DImode)
21215 switch (code)
21217 case EQ:
21218 /* SSE4.1 supports EQ. */
21219 if (!TARGET_SSE4_1)
21220 return false;
21221 break;
21223 case GT:
21224 case GTU:
21225 /* SSE4.2 supports GT/GTU. */
21226 if (!TARGET_SSE4_2)
21227 return false;
21228 break;
21230 default:
21231 gcc_unreachable ();
21235 /* Unsigned parallel compare is not supported by the hardware.
21236 Play some tricks to turn this into a signed comparison
21237 against 0. */
21238 if (code == GTU)
21240 cop0 = force_reg (mode, cop0);
21242 switch (mode)
21244 case V16SImode:
21245 case V8DImode:
21246 case V8SImode:
21247 case V4DImode:
21248 case V4SImode:
21249 case V2DImode:
21251 rtx t1, t2, mask;
21252 rtx (*gen_sub3) (rtx, rtx, rtx);
21254 switch (mode)
21256 case V16SImode: gen_sub3 = gen_subv16si3; break;
21257 case V8DImode: gen_sub3 = gen_subv8di3; break;
21258 case V8SImode: gen_sub3 = gen_subv8si3; break;
21259 case V4DImode: gen_sub3 = gen_subv4di3; break;
21260 case V4SImode: gen_sub3 = gen_subv4si3; break;
21261 case V2DImode: gen_sub3 = gen_subv2di3; break;
21262 default:
21263 gcc_unreachable ();
21265 /* Subtract (-(INT MAX) - 1) from both operands to make
21266 them signed. */
21267 mask = ix86_build_signbit_mask (mode, true, false);
21268 t1 = gen_reg_rtx (mode);
21269 emit_insn (gen_sub3 (t1, cop0, mask));
21271 t2 = gen_reg_rtx (mode);
21272 emit_insn (gen_sub3 (t2, cop1, mask));
21274 cop0 = t1;
21275 cop1 = t2;
21276 code = GT;
21278 break;
21280 case V32QImode:
21281 case V16HImode:
21282 case V16QImode:
21283 case V8HImode:
21284 /* Perform a parallel unsigned saturating subtraction. */
21285 x = gen_reg_rtx (mode);
21286 emit_insn (gen_rtx_SET (VOIDmode, x,
21287 gen_rtx_US_MINUS (mode, cop0, cop1)));
21289 cop0 = x;
21290 cop1 = CONST0_RTX (mode);
21291 code = EQ;
21292 negate = !negate;
21293 break;
21295 default:
21296 gcc_unreachable ();
21301 /* Allow the comparison to be done in one mode, but the movcc to
21302 happen in another mode. */
21303 if (data_mode == mode)
21305 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21306 operands[1+negate], operands[2-negate]);
21308 else
21310 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21311 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21312 operands[1+negate], operands[2-negate]);
21313 if (GET_MODE (x) == mode)
21314 x = gen_lowpart (data_mode, x);
21317 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21318 operands[2-negate]);
21319 return true;
21322 static bool
21323 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21325 enum machine_mode mode = GET_MODE (op0);
21326 switch (mode)
21328 case V16SImode:
21329 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21330 force_reg (V16SImode, mask),
21331 op1));
21332 return true;
21333 case V16SFmode:
21334 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21335 force_reg (V16SImode, mask),
21336 op1));
21337 return true;
21338 case V8DImode:
21339 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21340 force_reg (V8DImode, mask), op1));
21341 return true;
21342 case V8DFmode:
21343 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21344 force_reg (V8DImode, mask), op1));
21345 return true;
21346 default:
21347 return false;
21351 /* Expand a variable vector permutation. */
21353 void
21354 ix86_expand_vec_perm (rtx operands[])
21356 rtx target = operands[0];
21357 rtx op0 = operands[1];
21358 rtx op1 = operands[2];
21359 rtx mask = operands[3];
21360 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21361 enum machine_mode mode = GET_MODE (op0);
21362 enum machine_mode maskmode = GET_MODE (mask);
21363 int w, e, i;
21364 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21366 /* Number of elements in the vector. */
21367 w = GET_MODE_NUNITS (mode);
21368 e = GET_MODE_UNIT_SIZE (mode);
21369 gcc_assert (w <= 64);
21371 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21372 return;
21374 if (TARGET_AVX2)
21376 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21378 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21379 an constant shuffle operand. With a tiny bit of effort we can
21380 use VPERMD instead. A re-interpretation stall for V4DFmode is
21381 unfortunate but there's no avoiding it.
21382 Similarly for V16HImode we don't have instructions for variable
21383 shuffling, while for V32QImode we can use after preparing suitable
21384 masks vpshufb; vpshufb; vpermq; vpor. */
21386 if (mode == V16HImode)
21388 maskmode = mode = V32QImode;
21389 w = 32;
21390 e = 1;
21392 else
21394 maskmode = mode = V8SImode;
21395 w = 8;
21396 e = 4;
21398 t1 = gen_reg_rtx (maskmode);
21400 /* Replicate the low bits of the V4DImode mask into V8SImode:
21401 mask = { A B C D }
21402 t1 = { A A B B C C D D }. */
21403 for (i = 0; i < w / 2; ++i)
21404 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21405 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21406 vt = force_reg (maskmode, vt);
21407 mask = gen_lowpart (maskmode, mask);
21408 if (maskmode == V8SImode)
21409 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21410 else
21411 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21413 /* Multiply the shuffle indicies by two. */
21414 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21415 OPTAB_DIRECT);
21417 /* Add one to the odd shuffle indicies:
21418 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21419 for (i = 0; i < w / 2; ++i)
21421 vec[i * 2] = const0_rtx;
21422 vec[i * 2 + 1] = const1_rtx;
21424 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21425 vt = validize_mem (force_const_mem (maskmode, vt));
21426 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21427 OPTAB_DIRECT);
21429 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21430 operands[3] = mask = t1;
21431 target = gen_reg_rtx (mode);
21432 op0 = gen_lowpart (mode, op0);
21433 op1 = gen_lowpart (mode, op1);
21436 switch (mode)
21438 case V8SImode:
21439 /* The VPERMD and VPERMPS instructions already properly ignore
21440 the high bits of the shuffle elements. No need for us to
21441 perform an AND ourselves. */
21442 if (one_operand_shuffle)
21444 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21445 if (target != operands[0])
21446 emit_move_insn (operands[0],
21447 gen_lowpart (GET_MODE (operands[0]), target));
21449 else
21451 t1 = gen_reg_rtx (V8SImode);
21452 t2 = gen_reg_rtx (V8SImode);
21453 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21454 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21455 goto merge_two;
21457 return;
21459 case V8SFmode:
21460 mask = gen_lowpart (V8SFmode, mask);
21461 if (one_operand_shuffle)
21462 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21463 else
21465 t1 = gen_reg_rtx (V8SFmode);
21466 t2 = gen_reg_rtx (V8SFmode);
21467 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21468 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21469 goto merge_two;
21471 return;
21473 case V4SImode:
21474 /* By combining the two 128-bit input vectors into one 256-bit
21475 input vector, we can use VPERMD and VPERMPS for the full
21476 two-operand shuffle. */
21477 t1 = gen_reg_rtx (V8SImode);
21478 t2 = gen_reg_rtx (V8SImode);
21479 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21480 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21481 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21482 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21483 return;
21485 case V4SFmode:
21486 t1 = gen_reg_rtx (V8SFmode);
21487 t2 = gen_reg_rtx (V8SImode);
21488 mask = gen_lowpart (V4SImode, mask);
21489 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21490 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21491 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21492 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21493 return;
21495 case V32QImode:
21496 t1 = gen_reg_rtx (V32QImode);
21497 t2 = gen_reg_rtx (V32QImode);
21498 t3 = gen_reg_rtx (V32QImode);
21499 vt2 = GEN_INT (128);
21500 for (i = 0; i < 32; i++)
21501 vec[i] = vt2;
21502 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21503 vt = force_reg (V32QImode, vt);
21504 for (i = 0; i < 32; i++)
21505 vec[i] = i < 16 ? vt2 : const0_rtx;
21506 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21507 vt2 = force_reg (V32QImode, vt2);
21508 /* From mask create two adjusted masks, which contain the same
21509 bits as mask in the low 7 bits of each vector element.
21510 The first mask will have the most significant bit clear
21511 if it requests element from the same 128-bit lane
21512 and MSB set if it requests element from the other 128-bit lane.
21513 The second mask will have the opposite values of the MSB,
21514 and additionally will have its 128-bit lanes swapped.
21515 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21516 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21517 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21518 stands for other 12 bytes. */
21519 /* The bit whether element is from the same lane or the other
21520 lane is bit 4, so shift it up by 3 to the MSB position. */
21521 t5 = gen_reg_rtx (V4DImode);
21522 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21523 GEN_INT (3)));
21524 /* Clear MSB bits from the mask just in case it had them set. */
21525 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21526 /* After this t1 will have MSB set for elements from other lane. */
21527 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21528 /* Clear bits other than MSB. */
21529 emit_insn (gen_andv32qi3 (t1, t1, vt));
21530 /* Or in the lower bits from mask into t3. */
21531 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21532 /* And invert MSB bits in t1, so MSB is set for elements from the same
21533 lane. */
21534 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21535 /* Swap 128-bit lanes in t3. */
21536 t6 = gen_reg_rtx (V4DImode);
21537 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21538 const2_rtx, GEN_INT (3),
21539 const0_rtx, const1_rtx));
21540 /* And or in the lower bits from mask into t1. */
21541 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21542 if (one_operand_shuffle)
21544 /* Each of these shuffles will put 0s in places where
21545 element from the other 128-bit lane is needed, otherwise
21546 will shuffle in the requested value. */
21547 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21548 gen_lowpart (V32QImode, t6)));
21549 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21550 /* For t3 the 128-bit lanes are swapped again. */
21551 t7 = gen_reg_rtx (V4DImode);
21552 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21553 const2_rtx, GEN_INT (3),
21554 const0_rtx, const1_rtx));
21555 /* And oring both together leads to the result. */
21556 emit_insn (gen_iorv32qi3 (target, t1,
21557 gen_lowpart (V32QImode, t7)));
21558 if (target != operands[0])
21559 emit_move_insn (operands[0],
21560 gen_lowpart (GET_MODE (operands[0]), target));
21561 return;
21564 t4 = gen_reg_rtx (V32QImode);
21565 /* Similarly to the above one_operand_shuffle code,
21566 just for repeated twice for each operand. merge_two:
21567 code will merge the two results together. */
21568 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21569 gen_lowpart (V32QImode, t6)));
21570 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21571 gen_lowpart (V32QImode, t6)));
21572 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21573 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21574 t7 = gen_reg_rtx (V4DImode);
21575 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21576 const2_rtx, GEN_INT (3),
21577 const0_rtx, const1_rtx));
21578 t8 = gen_reg_rtx (V4DImode);
21579 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21580 const2_rtx, GEN_INT (3),
21581 const0_rtx, const1_rtx));
21582 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21583 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21584 t1 = t4;
21585 t2 = t3;
21586 goto merge_two;
21588 default:
21589 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21590 break;
21594 if (TARGET_XOP)
21596 /* The XOP VPPERM insn supports three inputs. By ignoring the
21597 one_operand_shuffle special case, we avoid creating another
21598 set of constant vectors in memory. */
21599 one_operand_shuffle = false;
21601 /* mask = mask & {2*w-1, ...} */
21602 vt = GEN_INT (2*w - 1);
21604 else
21606 /* mask = mask & {w-1, ...} */
21607 vt = GEN_INT (w - 1);
21610 for (i = 0; i < w; i++)
21611 vec[i] = vt;
21612 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21613 mask = expand_simple_binop (maskmode, AND, mask, vt,
21614 NULL_RTX, 0, OPTAB_DIRECT);
21616 /* For non-QImode operations, convert the word permutation control
21617 into a byte permutation control. */
21618 if (mode != V16QImode)
21620 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21621 GEN_INT (exact_log2 (e)),
21622 NULL_RTX, 0, OPTAB_DIRECT);
21624 /* Convert mask to vector of chars. */
21625 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21627 /* Replicate each of the input bytes into byte positions:
21628 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21629 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21630 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21631 for (i = 0; i < 16; ++i)
21632 vec[i] = GEN_INT (i/e * e);
21633 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21634 vt = validize_mem (force_const_mem (V16QImode, vt));
21635 if (TARGET_XOP)
21636 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21637 else
21638 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21640 /* Convert it into the byte positions by doing
21641 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21642 for (i = 0; i < 16; ++i)
21643 vec[i] = GEN_INT (i % e);
21644 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21645 vt = validize_mem (force_const_mem (V16QImode, vt));
21646 emit_insn (gen_addv16qi3 (mask, mask, vt));
21649 /* The actual shuffle operations all operate on V16QImode. */
21650 op0 = gen_lowpart (V16QImode, op0);
21651 op1 = gen_lowpart (V16QImode, op1);
21653 if (TARGET_XOP)
21655 if (GET_MODE (target) != V16QImode)
21656 target = gen_reg_rtx (V16QImode);
21657 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21658 if (target != operands[0])
21659 emit_move_insn (operands[0],
21660 gen_lowpart (GET_MODE (operands[0]), target));
21662 else if (one_operand_shuffle)
21664 if (GET_MODE (target) != V16QImode)
21665 target = gen_reg_rtx (V16QImode);
21666 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21667 if (target != operands[0])
21668 emit_move_insn (operands[0],
21669 gen_lowpart (GET_MODE (operands[0]), target));
21671 else
21673 rtx xops[6];
21674 bool ok;
21676 /* Shuffle the two input vectors independently. */
21677 t1 = gen_reg_rtx (V16QImode);
21678 t2 = gen_reg_rtx (V16QImode);
21679 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21680 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21682 merge_two:
21683 /* Then merge them together. The key is whether any given control
21684 element contained a bit set that indicates the second word. */
21685 mask = operands[3];
21686 vt = GEN_INT (w);
21687 if (maskmode == V2DImode && !TARGET_SSE4_1)
21689 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21690 more shuffle to convert the V2DI input mask into a V4SI
21691 input mask. At which point the masking that expand_int_vcond
21692 will work as desired. */
21693 rtx t3 = gen_reg_rtx (V4SImode);
21694 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21695 const0_rtx, const0_rtx,
21696 const2_rtx, const2_rtx));
21697 mask = t3;
21698 maskmode = V4SImode;
21699 e = w = 4;
21702 for (i = 0; i < w; i++)
21703 vec[i] = vt;
21704 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21705 vt = force_reg (maskmode, vt);
21706 mask = expand_simple_binop (maskmode, AND, mask, vt,
21707 NULL_RTX, 0, OPTAB_DIRECT);
21709 if (GET_MODE (target) != mode)
21710 target = gen_reg_rtx (mode);
21711 xops[0] = target;
21712 xops[1] = gen_lowpart (mode, t2);
21713 xops[2] = gen_lowpart (mode, t1);
21714 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21715 xops[4] = mask;
21716 xops[5] = vt;
21717 ok = ix86_expand_int_vcond (xops);
21718 gcc_assert (ok);
21719 if (target != operands[0])
21720 emit_move_insn (operands[0],
21721 gen_lowpart (GET_MODE (operands[0]), target));
21725 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21726 true if we should do zero extension, else sign extension. HIGH_P is
21727 true if we want the N/2 high elements, else the low elements. */
21729 void
21730 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21732 enum machine_mode imode = GET_MODE (src);
21733 rtx tmp;
21735 if (TARGET_SSE4_1)
21737 rtx (*unpack)(rtx, rtx);
21738 rtx (*extract)(rtx, rtx) = NULL;
21739 enum machine_mode halfmode = BLKmode;
21741 switch (imode)
21743 case V32QImode:
21744 if (unsigned_p)
21745 unpack = gen_avx2_zero_extendv16qiv16hi2;
21746 else
21747 unpack = gen_avx2_sign_extendv16qiv16hi2;
21748 halfmode = V16QImode;
21749 extract
21750 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21751 break;
21752 case V32HImode:
21753 if (unsigned_p)
21754 unpack = gen_avx512f_zero_extendv16hiv16si2;
21755 else
21756 unpack = gen_avx512f_sign_extendv16hiv16si2;
21757 halfmode = V16HImode;
21758 extract
21759 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21760 break;
21761 case V16HImode:
21762 if (unsigned_p)
21763 unpack = gen_avx2_zero_extendv8hiv8si2;
21764 else
21765 unpack = gen_avx2_sign_extendv8hiv8si2;
21766 halfmode = V8HImode;
21767 extract
21768 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21769 break;
21770 case V16SImode:
21771 if (unsigned_p)
21772 unpack = gen_avx512f_zero_extendv8siv8di2;
21773 else
21774 unpack = gen_avx512f_sign_extendv8siv8di2;
21775 halfmode = V8SImode;
21776 extract
21777 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21778 break;
21779 case V8SImode:
21780 if (unsigned_p)
21781 unpack = gen_avx2_zero_extendv4siv4di2;
21782 else
21783 unpack = gen_avx2_sign_extendv4siv4di2;
21784 halfmode = V4SImode;
21785 extract
21786 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21787 break;
21788 case V16QImode:
21789 if (unsigned_p)
21790 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21791 else
21792 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21793 break;
21794 case V8HImode:
21795 if (unsigned_p)
21796 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21797 else
21798 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21799 break;
21800 case V4SImode:
21801 if (unsigned_p)
21802 unpack = gen_sse4_1_zero_extendv2siv2di2;
21803 else
21804 unpack = gen_sse4_1_sign_extendv2siv2di2;
21805 break;
21806 default:
21807 gcc_unreachable ();
21810 if (GET_MODE_SIZE (imode) >= 32)
21812 tmp = gen_reg_rtx (halfmode);
21813 emit_insn (extract (tmp, src));
21815 else if (high_p)
21817 /* Shift higher 8 bytes to lower 8 bytes. */
21818 tmp = gen_reg_rtx (V1TImode);
21819 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21820 GEN_INT (64)));
21821 tmp = gen_lowpart (imode, tmp);
21823 else
21824 tmp = src;
21826 emit_insn (unpack (dest, tmp));
21828 else
21830 rtx (*unpack)(rtx, rtx, rtx);
21832 switch (imode)
21834 case V16QImode:
21835 if (high_p)
21836 unpack = gen_vec_interleave_highv16qi;
21837 else
21838 unpack = gen_vec_interleave_lowv16qi;
21839 break;
21840 case V8HImode:
21841 if (high_p)
21842 unpack = gen_vec_interleave_highv8hi;
21843 else
21844 unpack = gen_vec_interleave_lowv8hi;
21845 break;
21846 case V4SImode:
21847 if (high_p)
21848 unpack = gen_vec_interleave_highv4si;
21849 else
21850 unpack = gen_vec_interleave_lowv4si;
21851 break;
21852 default:
21853 gcc_unreachable ();
21856 if (unsigned_p)
21857 tmp = force_reg (imode, CONST0_RTX (imode));
21858 else
21859 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21860 src, pc_rtx, pc_rtx);
21862 rtx tmp2 = gen_reg_rtx (imode);
21863 emit_insn (unpack (tmp2, src, tmp));
21864 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21868 /* Expand conditional increment or decrement using adb/sbb instructions.
21869 The default case using setcc followed by the conditional move can be
21870 done by generic code. */
21871 bool
21872 ix86_expand_int_addcc (rtx operands[])
21874 enum rtx_code code = GET_CODE (operands[1]);
21875 rtx flags;
21876 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21877 rtx compare_op;
21878 rtx val = const0_rtx;
21879 bool fpcmp = false;
21880 enum machine_mode mode;
21881 rtx op0 = XEXP (operands[1], 0);
21882 rtx op1 = XEXP (operands[1], 1);
21884 if (operands[3] != const1_rtx
21885 && operands[3] != constm1_rtx)
21886 return false;
21887 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21888 return false;
21889 code = GET_CODE (compare_op);
21891 flags = XEXP (compare_op, 0);
21893 if (GET_MODE (flags) == CCFPmode
21894 || GET_MODE (flags) == CCFPUmode)
21896 fpcmp = true;
21897 code = ix86_fp_compare_code_to_integer (code);
21900 if (code != LTU)
21902 val = constm1_rtx;
21903 if (fpcmp)
21904 PUT_CODE (compare_op,
21905 reverse_condition_maybe_unordered
21906 (GET_CODE (compare_op)));
21907 else
21908 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21911 mode = GET_MODE (operands[0]);
21913 /* Construct either adc or sbb insn. */
21914 if ((code == LTU) == (operands[3] == constm1_rtx))
21916 switch (mode)
21918 case QImode:
21919 insn = gen_subqi3_carry;
21920 break;
21921 case HImode:
21922 insn = gen_subhi3_carry;
21923 break;
21924 case SImode:
21925 insn = gen_subsi3_carry;
21926 break;
21927 case DImode:
21928 insn = gen_subdi3_carry;
21929 break;
21930 default:
21931 gcc_unreachable ();
21934 else
21936 switch (mode)
21938 case QImode:
21939 insn = gen_addqi3_carry;
21940 break;
21941 case HImode:
21942 insn = gen_addhi3_carry;
21943 break;
21944 case SImode:
21945 insn = gen_addsi3_carry;
21946 break;
21947 case DImode:
21948 insn = gen_adddi3_carry;
21949 break;
21950 default:
21951 gcc_unreachable ();
21954 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21956 return true;
21960 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21961 but works for floating pointer parameters and nonoffsetable memories.
21962 For pushes, it returns just stack offsets; the values will be saved
21963 in the right order. Maximally three parts are generated. */
21965 static int
21966 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21968 int size;
21970 if (!TARGET_64BIT)
21971 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21972 else
21973 size = (GET_MODE_SIZE (mode) + 4) / 8;
21975 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
21976 gcc_assert (size >= 2 && size <= 4);
21978 /* Optimize constant pool reference to immediates. This is used by fp
21979 moves, that force all constants to memory to allow combining. */
21980 if (MEM_P (operand) && MEM_READONLY_P (operand))
21982 rtx tmp = maybe_get_pool_constant (operand);
21983 if (tmp)
21984 operand = tmp;
21987 if (MEM_P (operand) && !offsettable_memref_p (operand))
21989 /* The only non-offsetable memories we handle are pushes. */
21990 int ok = push_operand (operand, VOIDmode);
21992 gcc_assert (ok);
21994 operand = copy_rtx (operand);
21995 PUT_MODE (operand, word_mode);
21996 parts[0] = parts[1] = parts[2] = parts[3] = operand;
21997 return size;
22000 if (GET_CODE (operand) == CONST_VECTOR)
22002 enum machine_mode imode = int_mode_for_mode (mode);
22003 /* Caution: if we looked through a constant pool memory above,
22004 the operand may actually have a different mode now. That's
22005 ok, since we want to pun this all the way back to an integer. */
22006 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22007 gcc_assert (operand != NULL);
22008 mode = imode;
22011 if (!TARGET_64BIT)
22013 if (mode == DImode)
22014 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22015 else
22017 int i;
22019 if (REG_P (operand))
22021 gcc_assert (reload_completed);
22022 for (i = 0; i < size; i++)
22023 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22025 else if (offsettable_memref_p (operand))
22027 operand = adjust_address (operand, SImode, 0);
22028 parts[0] = operand;
22029 for (i = 1; i < size; i++)
22030 parts[i] = adjust_address (operand, SImode, 4 * i);
22032 else if (GET_CODE (operand) == CONST_DOUBLE)
22034 REAL_VALUE_TYPE r;
22035 long l[4];
22037 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22038 switch (mode)
22040 case TFmode:
22041 real_to_target (l, &r, mode);
22042 parts[3] = gen_int_mode (l[3], SImode);
22043 parts[2] = gen_int_mode (l[2], SImode);
22044 break;
22045 case XFmode:
22046 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22047 long double may not be 80-bit. */
22048 real_to_target (l, &r, mode);
22049 parts[2] = gen_int_mode (l[2], SImode);
22050 break;
22051 case DFmode:
22052 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22053 break;
22054 default:
22055 gcc_unreachable ();
22057 parts[1] = gen_int_mode (l[1], SImode);
22058 parts[0] = gen_int_mode (l[0], SImode);
22060 else
22061 gcc_unreachable ();
22064 else
22066 if (mode == TImode)
22067 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22068 if (mode == XFmode || mode == TFmode)
22070 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22071 if (REG_P (operand))
22073 gcc_assert (reload_completed);
22074 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22075 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22077 else if (offsettable_memref_p (operand))
22079 operand = adjust_address (operand, DImode, 0);
22080 parts[0] = operand;
22081 parts[1] = adjust_address (operand, upper_mode, 8);
22083 else if (GET_CODE (operand) == CONST_DOUBLE)
22085 REAL_VALUE_TYPE r;
22086 long l[4];
22088 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22089 real_to_target (l, &r, mode);
22091 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22092 if (HOST_BITS_PER_WIDE_INT >= 64)
22093 parts[0]
22094 = gen_int_mode
22095 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22096 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22097 DImode);
22098 else
22099 parts[0] = immed_double_const (l[0], l[1], DImode);
22101 if (upper_mode == SImode)
22102 parts[1] = gen_int_mode (l[2], SImode);
22103 else if (HOST_BITS_PER_WIDE_INT >= 64)
22104 parts[1]
22105 = gen_int_mode
22106 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22107 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22108 DImode);
22109 else
22110 parts[1] = immed_double_const (l[2], l[3], DImode);
22112 else
22113 gcc_unreachable ();
22117 return size;
22120 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22121 Return false when normal moves are needed; true when all required
22122 insns have been emitted. Operands 2-4 contain the input values
22123 int the correct order; operands 5-7 contain the output values. */
22125 void
22126 ix86_split_long_move (rtx operands[])
22128 rtx part[2][4];
22129 int nparts, i, j;
22130 int push = 0;
22131 int collisions = 0;
22132 enum machine_mode mode = GET_MODE (operands[0]);
22133 bool collisionparts[4];
22135 /* The DFmode expanders may ask us to move double.
22136 For 64bit target this is single move. By hiding the fact
22137 here we simplify i386.md splitters. */
22138 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22140 /* Optimize constant pool reference to immediates. This is used by
22141 fp moves, that force all constants to memory to allow combining. */
22143 if (MEM_P (operands[1])
22144 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22145 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22146 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22147 if (push_operand (operands[0], VOIDmode))
22149 operands[0] = copy_rtx (operands[0]);
22150 PUT_MODE (operands[0], word_mode);
22152 else
22153 operands[0] = gen_lowpart (DImode, operands[0]);
22154 operands[1] = gen_lowpart (DImode, operands[1]);
22155 emit_move_insn (operands[0], operands[1]);
22156 return;
22159 /* The only non-offsettable memory we handle is push. */
22160 if (push_operand (operands[0], VOIDmode))
22161 push = 1;
22162 else
22163 gcc_assert (!MEM_P (operands[0])
22164 || offsettable_memref_p (operands[0]));
22166 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22167 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22169 /* When emitting push, take care for source operands on the stack. */
22170 if (push && MEM_P (operands[1])
22171 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22173 rtx src_base = XEXP (part[1][nparts - 1], 0);
22175 /* Compensate for the stack decrement by 4. */
22176 if (!TARGET_64BIT && nparts == 3
22177 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22178 src_base = plus_constant (Pmode, src_base, 4);
22180 /* src_base refers to the stack pointer and is
22181 automatically decreased by emitted push. */
22182 for (i = 0; i < nparts; i++)
22183 part[1][i] = change_address (part[1][i],
22184 GET_MODE (part[1][i]), src_base);
22187 /* We need to do copy in the right order in case an address register
22188 of the source overlaps the destination. */
22189 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22191 rtx tmp;
22193 for (i = 0; i < nparts; i++)
22195 collisionparts[i]
22196 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22197 if (collisionparts[i])
22198 collisions++;
22201 /* Collision in the middle part can be handled by reordering. */
22202 if (collisions == 1 && nparts == 3 && collisionparts [1])
22204 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22205 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22207 else if (collisions == 1
22208 && nparts == 4
22209 && (collisionparts [1] || collisionparts [2]))
22211 if (collisionparts [1])
22213 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22214 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22216 else
22218 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22219 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22223 /* If there are more collisions, we can't handle it by reordering.
22224 Do an lea to the last part and use only one colliding move. */
22225 else if (collisions > 1)
22227 rtx base;
22229 collisions = 1;
22231 base = part[0][nparts - 1];
22233 /* Handle the case when the last part isn't valid for lea.
22234 Happens in 64-bit mode storing the 12-byte XFmode. */
22235 if (GET_MODE (base) != Pmode)
22236 base = gen_rtx_REG (Pmode, REGNO (base));
22238 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22239 part[1][0] = replace_equiv_address (part[1][0], base);
22240 for (i = 1; i < nparts; i++)
22242 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22243 part[1][i] = replace_equiv_address (part[1][i], tmp);
22248 if (push)
22250 if (!TARGET_64BIT)
22252 if (nparts == 3)
22254 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22255 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22256 stack_pointer_rtx, GEN_INT (-4)));
22257 emit_move_insn (part[0][2], part[1][2]);
22259 else if (nparts == 4)
22261 emit_move_insn (part[0][3], part[1][3]);
22262 emit_move_insn (part[0][2], part[1][2]);
22265 else
22267 /* In 64bit mode we don't have 32bit push available. In case this is
22268 register, it is OK - we will just use larger counterpart. We also
22269 retype memory - these comes from attempt to avoid REX prefix on
22270 moving of second half of TFmode value. */
22271 if (GET_MODE (part[1][1]) == SImode)
22273 switch (GET_CODE (part[1][1]))
22275 case MEM:
22276 part[1][1] = adjust_address (part[1][1], DImode, 0);
22277 break;
22279 case REG:
22280 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22281 break;
22283 default:
22284 gcc_unreachable ();
22287 if (GET_MODE (part[1][0]) == SImode)
22288 part[1][0] = part[1][1];
22291 emit_move_insn (part[0][1], part[1][1]);
22292 emit_move_insn (part[0][0], part[1][0]);
22293 return;
22296 /* Choose correct order to not overwrite the source before it is copied. */
22297 if ((REG_P (part[0][0])
22298 && REG_P (part[1][1])
22299 && (REGNO (part[0][0]) == REGNO (part[1][1])
22300 || (nparts == 3
22301 && REGNO (part[0][0]) == REGNO (part[1][2]))
22302 || (nparts == 4
22303 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22304 || (collisions > 0
22305 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22307 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22309 operands[2 + i] = part[0][j];
22310 operands[6 + i] = part[1][j];
22313 else
22315 for (i = 0; i < nparts; i++)
22317 operands[2 + i] = part[0][i];
22318 operands[6 + i] = part[1][i];
22322 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22323 if (optimize_insn_for_size_p ())
22325 for (j = 0; j < nparts - 1; j++)
22326 if (CONST_INT_P (operands[6 + j])
22327 && operands[6 + j] != const0_rtx
22328 && REG_P (operands[2 + j]))
22329 for (i = j; i < nparts - 1; i++)
22330 if (CONST_INT_P (operands[7 + i])
22331 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22332 operands[7 + i] = operands[2 + j];
22335 for (i = 0; i < nparts; i++)
22336 emit_move_insn (operands[2 + i], operands[6 + i]);
22338 return;
22341 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22342 left shift by a constant, either using a single shift or
22343 a sequence of add instructions. */
22345 static void
22346 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22348 rtx (*insn)(rtx, rtx, rtx);
22350 if (count == 1
22351 || (count * ix86_cost->add <= ix86_cost->shift_const
22352 && !optimize_insn_for_size_p ()))
22354 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22355 while (count-- > 0)
22356 emit_insn (insn (operand, operand, operand));
22358 else
22360 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22361 emit_insn (insn (operand, operand, GEN_INT (count)));
22365 void
22366 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22368 rtx (*gen_ashl3)(rtx, rtx, rtx);
22369 rtx (*gen_shld)(rtx, rtx, rtx);
22370 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22372 rtx low[2], high[2];
22373 int count;
22375 if (CONST_INT_P (operands[2]))
22377 split_double_mode (mode, operands, 2, low, high);
22378 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22380 if (count >= half_width)
22382 emit_move_insn (high[0], low[1]);
22383 emit_move_insn (low[0], const0_rtx);
22385 if (count > half_width)
22386 ix86_expand_ashl_const (high[0], count - half_width, mode);
22388 else
22390 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22392 if (!rtx_equal_p (operands[0], operands[1]))
22393 emit_move_insn (operands[0], operands[1]);
22395 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22396 ix86_expand_ashl_const (low[0], count, mode);
22398 return;
22401 split_double_mode (mode, operands, 1, low, high);
22403 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22405 if (operands[1] == const1_rtx)
22407 /* Assuming we've chosen a QImode capable registers, then 1 << N
22408 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22409 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22411 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22413 ix86_expand_clear (low[0]);
22414 ix86_expand_clear (high[0]);
22415 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22417 d = gen_lowpart (QImode, low[0]);
22418 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22419 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22420 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22422 d = gen_lowpart (QImode, high[0]);
22423 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22424 s = gen_rtx_NE (QImode, flags, const0_rtx);
22425 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22428 /* Otherwise, we can get the same results by manually performing
22429 a bit extract operation on bit 5/6, and then performing the two
22430 shifts. The two methods of getting 0/1 into low/high are exactly
22431 the same size. Avoiding the shift in the bit extract case helps
22432 pentium4 a bit; no one else seems to care much either way. */
22433 else
22435 enum machine_mode half_mode;
22436 rtx (*gen_lshr3)(rtx, rtx, rtx);
22437 rtx (*gen_and3)(rtx, rtx, rtx);
22438 rtx (*gen_xor3)(rtx, rtx, rtx);
22439 HOST_WIDE_INT bits;
22440 rtx x;
22442 if (mode == DImode)
22444 half_mode = SImode;
22445 gen_lshr3 = gen_lshrsi3;
22446 gen_and3 = gen_andsi3;
22447 gen_xor3 = gen_xorsi3;
22448 bits = 5;
22450 else
22452 half_mode = DImode;
22453 gen_lshr3 = gen_lshrdi3;
22454 gen_and3 = gen_anddi3;
22455 gen_xor3 = gen_xordi3;
22456 bits = 6;
22459 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22460 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22461 else
22462 x = gen_lowpart (half_mode, operands[2]);
22463 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22465 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22466 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22467 emit_move_insn (low[0], high[0]);
22468 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22471 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22472 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22473 return;
22476 if (operands[1] == constm1_rtx)
22478 /* For -1 << N, we can avoid the shld instruction, because we
22479 know that we're shifting 0...31/63 ones into a -1. */
22480 emit_move_insn (low[0], constm1_rtx);
22481 if (optimize_insn_for_size_p ())
22482 emit_move_insn (high[0], low[0]);
22483 else
22484 emit_move_insn (high[0], constm1_rtx);
22486 else
22488 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22490 if (!rtx_equal_p (operands[0], operands[1]))
22491 emit_move_insn (operands[0], operands[1]);
22493 split_double_mode (mode, operands, 1, low, high);
22494 emit_insn (gen_shld (high[0], low[0], operands[2]));
22497 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22499 if (TARGET_CMOVE && scratch)
22501 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22502 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22504 ix86_expand_clear (scratch);
22505 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22507 else
22509 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22510 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22512 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22516 void
22517 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22519 rtx (*gen_ashr3)(rtx, rtx, rtx)
22520 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22521 rtx (*gen_shrd)(rtx, rtx, rtx);
22522 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22524 rtx low[2], high[2];
22525 int count;
22527 if (CONST_INT_P (operands[2]))
22529 split_double_mode (mode, operands, 2, low, high);
22530 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22532 if (count == GET_MODE_BITSIZE (mode) - 1)
22534 emit_move_insn (high[0], high[1]);
22535 emit_insn (gen_ashr3 (high[0], high[0],
22536 GEN_INT (half_width - 1)));
22537 emit_move_insn (low[0], high[0]);
22540 else if (count >= half_width)
22542 emit_move_insn (low[0], high[1]);
22543 emit_move_insn (high[0], low[0]);
22544 emit_insn (gen_ashr3 (high[0], high[0],
22545 GEN_INT (half_width - 1)));
22547 if (count > half_width)
22548 emit_insn (gen_ashr3 (low[0], low[0],
22549 GEN_INT (count - half_width)));
22551 else
22553 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22555 if (!rtx_equal_p (operands[0], operands[1]))
22556 emit_move_insn (operands[0], operands[1]);
22558 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22559 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22562 else
22564 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22566 if (!rtx_equal_p (operands[0], operands[1]))
22567 emit_move_insn (operands[0], operands[1]);
22569 split_double_mode (mode, operands, 1, low, high);
22571 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22572 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22574 if (TARGET_CMOVE && scratch)
22576 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22577 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22579 emit_move_insn (scratch, high[0]);
22580 emit_insn (gen_ashr3 (scratch, scratch,
22581 GEN_INT (half_width - 1)));
22582 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22583 scratch));
22585 else
22587 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22588 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22590 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22595 void
22596 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22598 rtx (*gen_lshr3)(rtx, rtx, rtx)
22599 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22600 rtx (*gen_shrd)(rtx, rtx, rtx);
22601 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22603 rtx low[2], high[2];
22604 int count;
22606 if (CONST_INT_P (operands[2]))
22608 split_double_mode (mode, operands, 2, low, high);
22609 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22611 if (count >= half_width)
22613 emit_move_insn (low[0], high[1]);
22614 ix86_expand_clear (high[0]);
22616 if (count > half_width)
22617 emit_insn (gen_lshr3 (low[0], low[0],
22618 GEN_INT (count - half_width)));
22620 else
22622 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22624 if (!rtx_equal_p (operands[0], operands[1]))
22625 emit_move_insn (operands[0], operands[1]);
22627 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22628 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22631 else
22633 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22635 if (!rtx_equal_p (operands[0], operands[1]))
22636 emit_move_insn (operands[0], operands[1]);
22638 split_double_mode (mode, operands, 1, low, high);
22640 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22641 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22643 if (TARGET_CMOVE && scratch)
22645 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22646 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22648 ix86_expand_clear (scratch);
22649 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22650 scratch));
22652 else
22654 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22655 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22657 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22662 /* Predict just emitted jump instruction to be taken with probability PROB. */
22663 static void
22664 predict_jump (int prob)
22666 rtx insn = get_last_insn ();
22667 gcc_assert (JUMP_P (insn));
22668 add_int_reg_note (insn, REG_BR_PROB, prob);
22671 /* Helper function for the string operations below. Dest VARIABLE whether
22672 it is aligned to VALUE bytes. If true, jump to the label. */
22673 static rtx
22674 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22676 rtx label = gen_label_rtx ();
22677 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22678 if (GET_MODE (variable) == DImode)
22679 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22680 else
22681 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22682 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22683 1, label);
22684 if (epilogue)
22685 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22686 else
22687 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22688 return label;
22691 /* Adjust COUNTER by the VALUE. */
22692 static void
22693 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22695 rtx (*gen_add)(rtx, rtx, rtx)
22696 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22698 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22701 /* Zero extend possibly SImode EXP to Pmode register. */
22703 ix86_zero_extend_to_Pmode (rtx exp)
22705 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22708 /* Divide COUNTREG by SCALE. */
22709 static rtx
22710 scale_counter (rtx countreg, int scale)
22712 rtx sc;
22714 if (scale == 1)
22715 return countreg;
22716 if (CONST_INT_P (countreg))
22717 return GEN_INT (INTVAL (countreg) / scale);
22718 gcc_assert (REG_P (countreg));
22720 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22721 GEN_INT (exact_log2 (scale)),
22722 NULL, 1, OPTAB_DIRECT);
22723 return sc;
22726 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22727 DImode for constant loop counts. */
22729 static enum machine_mode
22730 counter_mode (rtx count_exp)
22732 if (GET_MODE (count_exp) != VOIDmode)
22733 return GET_MODE (count_exp);
22734 if (!CONST_INT_P (count_exp))
22735 return Pmode;
22736 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22737 return DImode;
22738 return SImode;
22741 /* Copy the address to a Pmode register. This is used for x32 to
22742 truncate DImode TLS address to a SImode register. */
22744 static rtx
22745 ix86_copy_addr_to_reg (rtx addr)
22747 if (GET_MODE (addr) == Pmode)
22748 return copy_addr_to_reg (addr);
22749 else
22751 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22752 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22756 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22757 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22758 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22759 memory by VALUE (supposed to be in MODE).
22761 The size is rounded down to whole number of chunk size moved at once.
22762 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22765 static void
22766 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22767 rtx destptr, rtx srcptr, rtx value,
22768 rtx count, enum machine_mode mode, int unroll,
22769 int expected_size, bool issetmem)
22771 rtx out_label, top_label, iter, tmp;
22772 enum machine_mode iter_mode = counter_mode (count);
22773 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22774 rtx piece_size = GEN_INT (piece_size_n);
22775 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22776 rtx size;
22777 int i;
22779 top_label = gen_label_rtx ();
22780 out_label = gen_label_rtx ();
22781 iter = gen_reg_rtx (iter_mode);
22783 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22784 NULL, 1, OPTAB_DIRECT);
22785 /* Those two should combine. */
22786 if (piece_size == const1_rtx)
22788 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22789 true, out_label);
22790 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22792 emit_move_insn (iter, const0_rtx);
22794 emit_label (top_label);
22796 tmp = convert_modes (Pmode, iter_mode, iter, true);
22798 /* This assert could be relaxed - in this case we'll need to compute
22799 smallest power of two, containing in PIECE_SIZE_N and pass it to
22800 offset_address. */
22801 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22802 destmem = offset_address (destmem, tmp, piece_size_n);
22803 destmem = adjust_address (destmem, mode, 0);
22805 if (!issetmem)
22807 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22808 srcmem = adjust_address (srcmem, mode, 0);
22810 /* When unrolling for chips that reorder memory reads and writes,
22811 we can save registers by using single temporary.
22812 Also using 4 temporaries is overkill in 32bit mode. */
22813 if (!TARGET_64BIT && 0)
22815 for (i = 0; i < unroll; i++)
22817 if (i)
22819 destmem =
22820 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22821 srcmem =
22822 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22824 emit_move_insn (destmem, srcmem);
22827 else
22829 rtx tmpreg[4];
22830 gcc_assert (unroll <= 4);
22831 for (i = 0; i < unroll; i++)
22833 tmpreg[i] = gen_reg_rtx (mode);
22834 if (i)
22836 srcmem =
22837 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22839 emit_move_insn (tmpreg[i], srcmem);
22841 for (i = 0; i < unroll; i++)
22843 if (i)
22845 destmem =
22846 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22848 emit_move_insn (destmem, tmpreg[i]);
22852 else
22853 for (i = 0; i < unroll; i++)
22855 if (i)
22856 destmem =
22857 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22858 emit_move_insn (destmem, value);
22861 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22862 true, OPTAB_LIB_WIDEN);
22863 if (tmp != iter)
22864 emit_move_insn (iter, tmp);
22866 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22867 true, top_label);
22868 if (expected_size != -1)
22870 expected_size /= GET_MODE_SIZE (mode) * unroll;
22871 if (expected_size == 0)
22872 predict_jump (0);
22873 else if (expected_size > REG_BR_PROB_BASE)
22874 predict_jump (REG_BR_PROB_BASE - 1);
22875 else
22876 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22878 else
22879 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22880 iter = ix86_zero_extend_to_Pmode (iter);
22881 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22882 true, OPTAB_LIB_WIDEN);
22883 if (tmp != destptr)
22884 emit_move_insn (destptr, tmp);
22885 if (!issetmem)
22887 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22888 true, OPTAB_LIB_WIDEN);
22889 if (tmp != srcptr)
22890 emit_move_insn (srcptr, tmp);
22892 emit_label (out_label);
22895 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22896 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22897 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22898 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22899 ORIG_VALUE is the original value passed to memset to fill the memory with.
22900 Other arguments have same meaning as for previous function. */
22902 static void
22903 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22904 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22905 rtx count,
22906 enum machine_mode mode, bool issetmem)
22908 rtx destexp;
22909 rtx srcexp;
22910 rtx countreg;
22911 HOST_WIDE_INT rounded_count;
22913 /* If possible, it is shorter to use rep movs.
22914 TODO: Maybe it is better to move this logic to decide_alg. */
22915 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22916 && (!issetmem || orig_value == const0_rtx))
22917 mode = SImode;
22919 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22920 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22922 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22923 GET_MODE_SIZE (mode)));
22924 if (mode != QImode)
22926 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22927 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22928 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22930 else
22931 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22932 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22934 rounded_count = (INTVAL (count)
22935 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22936 destmem = shallow_copy_rtx (destmem);
22937 set_mem_size (destmem, rounded_count);
22939 else if (MEM_SIZE_KNOWN_P (destmem))
22940 clear_mem_size (destmem);
22942 if (issetmem)
22944 value = force_reg (mode, gen_lowpart (mode, value));
22945 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22947 else
22949 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22950 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22951 if (mode != QImode)
22953 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22954 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22955 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22957 else
22958 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22959 if (CONST_INT_P (count))
22961 rounded_count = (INTVAL (count)
22962 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22963 srcmem = shallow_copy_rtx (srcmem);
22964 set_mem_size (srcmem, rounded_count);
22966 else
22968 if (MEM_SIZE_KNOWN_P (srcmem))
22969 clear_mem_size (srcmem);
22971 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22972 destexp, srcexp));
22976 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22977 DESTMEM.
22978 SRC is passed by pointer to be updated on return.
22979 Return value is updated DST. */
22980 static rtx
22981 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
22982 HOST_WIDE_INT size_to_move)
22984 rtx dst = destmem, src = *srcmem, adjust, tempreg;
22985 enum insn_code code;
22986 enum machine_mode move_mode;
22987 int piece_size, i;
22989 /* Find the widest mode in which we could perform moves.
22990 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22991 it until move of such size is supported. */
22992 piece_size = 1 << floor_log2 (size_to_move);
22993 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
22994 code = optab_handler (mov_optab, move_mode);
22995 while (code == CODE_FOR_nothing && piece_size > 1)
22997 piece_size >>= 1;
22998 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
22999 code = optab_handler (mov_optab, move_mode);
23002 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23003 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23004 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23006 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23007 move_mode = mode_for_vector (word_mode, nunits);
23008 code = optab_handler (mov_optab, move_mode);
23009 if (code == CODE_FOR_nothing)
23011 move_mode = word_mode;
23012 piece_size = GET_MODE_SIZE (move_mode);
23013 code = optab_handler (mov_optab, move_mode);
23016 gcc_assert (code != CODE_FOR_nothing);
23018 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23019 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23021 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23022 gcc_assert (size_to_move % piece_size == 0);
23023 adjust = GEN_INT (piece_size);
23024 for (i = 0; i < size_to_move; i += piece_size)
23026 /* We move from memory to memory, so we'll need to do it via
23027 a temporary register. */
23028 tempreg = gen_reg_rtx (move_mode);
23029 emit_insn (GEN_FCN (code) (tempreg, src));
23030 emit_insn (GEN_FCN (code) (dst, tempreg));
23032 emit_move_insn (destptr,
23033 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23034 emit_move_insn (srcptr,
23035 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23037 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23038 piece_size);
23039 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23040 piece_size);
23043 /* Update DST and SRC rtx. */
23044 *srcmem = src;
23045 return dst;
23048 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23049 static void
23050 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23051 rtx destptr, rtx srcptr, rtx count, int max_size)
23053 rtx src, dest;
23054 if (CONST_INT_P (count))
23056 HOST_WIDE_INT countval = INTVAL (count);
23057 HOST_WIDE_INT epilogue_size = countval % max_size;
23058 int i;
23060 /* For now MAX_SIZE should be a power of 2. This assert could be
23061 relaxed, but it'll require a bit more complicated epilogue
23062 expanding. */
23063 gcc_assert ((max_size & (max_size - 1)) == 0);
23064 for (i = max_size; i >= 1; i >>= 1)
23066 if (epilogue_size & i)
23067 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23069 return;
23071 if (max_size > 8)
23073 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23074 count, 1, OPTAB_DIRECT);
23075 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23076 count, QImode, 1, 4, false);
23077 return;
23080 /* When there are stringops, we can cheaply increase dest and src pointers.
23081 Otherwise we save code size by maintaining offset (zero is readily
23082 available from preceding rep operation) and using x86 addressing modes.
23084 if (TARGET_SINGLE_STRINGOP)
23086 if (max_size > 4)
23088 rtx label = ix86_expand_aligntest (count, 4, true);
23089 src = change_address (srcmem, SImode, srcptr);
23090 dest = change_address (destmem, SImode, destptr);
23091 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23092 emit_label (label);
23093 LABEL_NUSES (label) = 1;
23095 if (max_size > 2)
23097 rtx label = ix86_expand_aligntest (count, 2, true);
23098 src = change_address (srcmem, HImode, srcptr);
23099 dest = change_address (destmem, HImode, destptr);
23100 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23101 emit_label (label);
23102 LABEL_NUSES (label) = 1;
23104 if (max_size > 1)
23106 rtx label = ix86_expand_aligntest (count, 1, true);
23107 src = change_address (srcmem, QImode, srcptr);
23108 dest = change_address (destmem, QImode, destptr);
23109 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23110 emit_label (label);
23111 LABEL_NUSES (label) = 1;
23114 else
23116 rtx offset = force_reg (Pmode, const0_rtx);
23117 rtx tmp;
23119 if (max_size > 4)
23121 rtx label = ix86_expand_aligntest (count, 4, true);
23122 src = change_address (srcmem, SImode, srcptr);
23123 dest = change_address (destmem, SImode, destptr);
23124 emit_move_insn (dest, src);
23125 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23126 true, OPTAB_LIB_WIDEN);
23127 if (tmp != offset)
23128 emit_move_insn (offset, tmp);
23129 emit_label (label);
23130 LABEL_NUSES (label) = 1;
23132 if (max_size > 2)
23134 rtx label = ix86_expand_aligntest (count, 2, true);
23135 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23136 src = change_address (srcmem, HImode, tmp);
23137 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23138 dest = change_address (destmem, HImode, tmp);
23139 emit_move_insn (dest, src);
23140 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23141 true, OPTAB_LIB_WIDEN);
23142 if (tmp != offset)
23143 emit_move_insn (offset, tmp);
23144 emit_label (label);
23145 LABEL_NUSES (label) = 1;
23147 if (max_size > 1)
23149 rtx label = ix86_expand_aligntest (count, 1, true);
23150 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23151 src = change_address (srcmem, QImode, tmp);
23152 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23153 dest = change_address (destmem, QImode, tmp);
23154 emit_move_insn (dest, src);
23155 emit_label (label);
23156 LABEL_NUSES (label) = 1;
23161 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23162 with value PROMOTED_VAL.
23163 SRC is passed by pointer to be updated on return.
23164 Return value is updated DST. */
23165 static rtx
23166 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23167 HOST_WIDE_INT size_to_move)
23169 rtx dst = destmem, adjust;
23170 enum insn_code code;
23171 enum machine_mode move_mode;
23172 int piece_size, i;
23174 /* Find the widest mode in which we could perform moves.
23175 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23176 it until move of such size is supported. */
23177 move_mode = GET_MODE (promoted_val);
23178 if (move_mode == VOIDmode)
23179 move_mode = QImode;
23180 if (size_to_move < GET_MODE_SIZE (move_mode))
23182 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23183 promoted_val = gen_lowpart (move_mode, promoted_val);
23185 piece_size = GET_MODE_SIZE (move_mode);
23186 code = optab_handler (mov_optab, move_mode);
23187 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23189 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23191 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23192 gcc_assert (size_to_move % piece_size == 0);
23193 adjust = GEN_INT (piece_size);
23194 for (i = 0; i < size_to_move; i += piece_size)
23196 if (piece_size <= GET_MODE_SIZE (word_mode))
23198 emit_insn (gen_strset (destptr, dst, promoted_val));
23199 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23200 piece_size);
23201 continue;
23204 emit_insn (GEN_FCN (code) (dst, promoted_val));
23206 emit_move_insn (destptr,
23207 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23209 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23210 piece_size);
23213 /* Update DST rtx. */
23214 return dst;
23216 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23217 static void
23218 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23219 rtx count, int max_size)
23221 count =
23222 expand_simple_binop (counter_mode (count), AND, count,
23223 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23224 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23225 gen_lowpart (QImode, value), count, QImode,
23226 1, max_size / 2, true);
23229 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23230 static void
23231 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23232 rtx count, int max_size)
23234 rtx dest;
23236 if (CONST_INT_P (count))
23238 HOST_WIDE_INT countval = INTVAL (count);
23239 HOST_WIDE_INT epilogue_size = countval % max_size;
23240 int i;
23242 /* For now MAX_SIZE should be a power of 2. This assert could be
23243 relaxed, but it'll require a bit more complicated epilogue
23244 expanding. */
23245 gcc_assert ((max_size & (max_size - 1)) == 0);
23246 for (i = max_size; i >= 1; i >>= 1)
23248 if (epilogue_size & i)
23250 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23251 destmem = emit_memset (destmem, destptr, vec_value, i);
23252 else
23253 destmem = emit_memset (destmem, destptr, value, i);
23256 return;
23258 if (max_size > 32)
23260 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23261 return;
23263 if (max_size > 16)
23265 rtx label = ix86_expand_aligntest (count, 16, true);
23266 if (TARGET_64BIT)
23268 dest = change_address (destmem, DImode, destptr);
23269 emit_insn (gen_strset (destptr, dest, value));
23270 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23271 emit_insn (gen_strset (destptr, dest, value));
23273 else
23275 dest = change_address (destmem, SImode, destptr);
23276 emit_insn (gen_strset (destptr, dest, value));
23277 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23278 emit_insn (gen_strset (destptr, dest, value));
23279 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23280 emit_insn (gen_strset (destptr, dest, value));
23281 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23282 emit_insn (gen_strset (destptr, dest, value));
23284 emit_label (label);
23285 LABEL_NUSES (label) = 1;
23287 if (max_size > 8)
23289 rtx label = ix86_expand_aligntest (count, 8, true);
23290 if (TARGET_64BIT)
23292 dest = change_address (destmem, DImode, destptr);
23293 emit_insn (gen_strset (destptr, dest, value));
23295 else
23297 dest = change_address (destmem, SImode, destptr);
23298 emit_insn (gen_strset (destptr, dest, value));
23299 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23300 emit_insn (gen_strset (destptr, dest, value));
23302 emit_label (label);
23303 LABEL_NUSES (label) = 1;
23305 if (max_size > 4)
23307 rtx label = ix86_expand_aligntest (count, 4, true);
23308 dest = change_address (destmem, SImode, destptr);
23309 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23310 emit_label (label);
23311 LABEL_NUSES (label) = 1;
23313 if (max_size > 2)
23315 rtx label = ix86_expand_aligntest (count, 2, true);
23316 dest = change_address (destmem, HImode, destptr);
23317 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23318 emit_label (label);
23319 LABEL_NUSES (label) = 1;
23321 if (max_size > 1)
23323 rtx label = ix86_expand_aligntest (count, 1, true);
23324 dest = change_address (destmem, QImode, destptr);
23325 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23326 emit_label (label);
23327 LABEL_NUSES (label) = 1;
23331 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23332 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23333 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23334 ignored.
23335 Return value is updated DESTMEM. */
23336 static rtx
23337 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23338 rtx destptr, rtx srcptr, rtx value,
23339 rtx vec_value, rtx count, int align,
23340 int desired_alignment, bool issetmem)
23342 int i;
23343 for (i = 1; i < desired_alignment; i <<= 1)
23345 if (align <= i)
23347 rtx label = ix86_expand_aligntest (destptr, i, false);
23348 if (issetmem)
23350 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23351 destmem = emit_memset (destmem, destptr, vec_value, i);
23352 else
23353 destmem = emit_memset (destmem, destptr, value, i);
23355 else
23356 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23357 ix86_adjust_counter (count, i);
23358 emit_label (label);
23359 LABEL_NUSES (label) = 1;
23360 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23363 return destmem;
23366 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23367 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23368 and jump to DONE_LABEL. */
23369 static void
23370 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23371 rtx destptr, rtx srcptr,
23372 rtx value, rtx vec_value,
23373 rtx count, int size,
23374 rtx done_label, bool issetmem)
23376 rtx label = ix86_expand_aligntest (count, size, false);
23377 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23378 rtx modesize;
23379 int n;
23381 /* If we do not have vector value to copy, we must reduce size. */
23382 if (issetmem)
23384 if (!vec_value)
23386 if (GET_MODE (value) == VOIDmode && size > 8)
23387 mode = Pmode;
23388 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23389 mode = GET_MODE (value);
23391 else
23392 mode = GET_MODE (vec_value), value = vec_value;
23394 else
23396 /* Choose appropriate vector mode. */
23397 if (size >= 32)
23398 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23399 else if (size >= 16)
23400 mode = TARGET_SSE ? V16QImode : DImode;
23401 srcmem = change_address (srcmem, mode, srcptr);
23403 destmem = change_address (destmem, mode, destptr);
23404 modesize = GEN_INT (GET_MODE_SIZE (mode));
23405 gcc_assert (GET_MODE_SIZE (mode) <= size);
23406 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23408 if (issetmem)
23409 emit_move_insn (destmem, gen_lowpart (mode, value));
23410 else
23412 emit_move_insn (destmem, srcmem);
23413 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23415 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23418 destmem = offset_address (destmem, count, 1);
23419 destmem = offset_address (destmem, GEN_INT (-2 * size),
23420 GET_MODE_SIZE (mode));
23421 if (!issetmem)
23423 srcmem = offset_address (srcmem, count, 1);
23424 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23425 GET_MODE_SIZE (mode));
23427 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23429 if (issetmem)
23430 emit_move_insn (destmem, gen_lowpart (mode, value));
23431 else
23433 emit_move_insn (destmem, srcmem);
23434 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23436 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23438 emit_jump_insn (gen_jump (done_label));
23439 emit_barrier ();
23441 emit_label (label);
23442 LABEL_NUSES (label) = 1;
23445 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23446 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23447 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23448 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23449 DONE_LABEL is a label after the whole copying sequence. The label is created
23450 on demand if *DONE_LABEL is NULL.
23451 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23452 bounds after the initial copies.
23454 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23455 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23456 we will dispatch to a library call for large blocks.
23458 In pseudocode we do:
23460 if (COUNT < SIZE)
23462 Assume that SIZE is 4. Bigger sizes are handled analogously
23463 if (COUNT & 4)
23465 copy 4 bytes from SRCPTR to DESTPTR
23466 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23467 goto done_label
23469 if (!COUNT)
23470 goto done_label;
23471 copy 1 byte from SRCPTR to DESTPTR
23472 if (COUNT & 2)
23474 copy 2 bytes from SRCPTR to DESTPTR
23475 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23478 else
23480 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23481 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23483 OLD_DESPTR = DESTPTR;
23484 Align DESTPTR up to DESIRED_ALIGN
23485 SRCPTR += DESTPTR - OLD_DESTPTR
23486 COUNT -= DEST_PTR - OLD_DESTPTR
23487 if (DYNAMIC_CHECK)
23488 Round COUNT down to multiple of SIZE
23489 << optional caller supplied zero size guard is here >>
23490 << optional caller suppplied dynamic check is here >>
23491 << caller supplied main copy loop is here >>
23493 done_label:
23495 static void
23496 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23497 rtx *destptr, rtx *srcptr,
23498 enum machine_mode mode,
23499 rtx value, rtx vec_value,
23500 rtx *count,
23501 rtx *done_label,
23502 int size,
23503 int desired_align,
23504 int align,
23505 unsigned HOST_WIDE_INT *min_size,
23506 bool dynamic_check,
23507 bool issetmem)
23509 rtx loop_label = NULL, label;
23510 int n;
23511 rtx modesize;
23512 int prolog_size = 0;
23513 rtx mode_value;
23515 /* Chose proper value to copy. */
23516 if (issetmem && VECTOR_MODE_P (mode))
23517 mode_value = vec_value;
23518 else
23519 mode_value = value;
23520 gcc_assert (GET_MODE_SIZE (mode) <= size);
23522 /* See if block is big or small, handle small blocks. */
23523 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23525 int size2 = size;
23526 loop_label = gen_label_rtx ();
23528 if (!*done_label)
23529 *done_label = gen_label_rtx ();
23531 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23532 1, loop_label);
23533 size2 >>= 1;
23535 /* Handle sizes > 3. */
23536 for (;size2 > 2; size2 >>= 1)
23537 expand_small_movmem_or_setmem (destmem, srcmem,
23538 *destptr, *srcptr,
23539 value, vec_value,
23540 *count,
23541 size2, *done_label, issetmem);
23542 /* Nothing to copy? Jump to DONE_LABEL if so */
23543 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23544 1, *done_label);
23546 /* Do a byte copy. */
23547 destmem = change_address (destmem, QImode, *destptr);
23548 if (issetmem)
23549 emit_move_insn (destmem, gen_lowpart (QImode, value));
23550 else
23552 srcmem = change_address (srcmem, QImode, *srcptr);
23553 emit_move_insn (destmem, srcmem);
23556 /* Handle sizes 2 and 3. */
23557 label = ix86_expand_aligntest (*count, 2, false);
23558 destmem = change_address (destmem, HImode, *destptr);
23559 destmem = offset_address (destmem, *count, 1);
23560 destmem = offset_address (destmem, GEN_INT (-2), 2);
23561 if (issetmem)
23562 emit_move_insn (destmem, gen_lowpart (HImode, value));
23563 else
23565 srcmem = change_address (srcmem, HImode, *srcptr);
23566 srcmem = offset_address (srcmem, *count, 1);
23567 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23568 emit_move_insn (destmem, srcmem);
23571 emit_label (label);
23572 LABEL_NUSES (label) = 1;
23573 emit_jump_insn (gen_jump (*done_label));
23574 emit_barrier ();
23576 else
23577 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23578 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23580 /* Start memcpy for COUNT >= SIZE. */
23581 if (loop_label)
23583 emit_label (loop_label);
23584 LABEL_NUSES (loop_label) = 1;
23587 /* Copy first desired_align bytes. */
23588 if (!issetmem)
23589 srcmem = change_address (srcmem, mode, *srcptr);
23590 destmem = change_address (destmem, mode, *destptr);
23591 modesize = GEN_INT (GET_MODE_SIZE (mode));
23592 for (n = 0; prolog_size < desired_align - align; n++)
23594 if (issetmem)
23595 emit_move_insn (destmem, mode_value);
23596 else
23598 emit_move_insn (destmem, srcmem);
23599 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23601 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23602 prolog_size += GET_MODE_SIZE (mode);
23606 /* Copy last SIZE bytes. */
23607 destmem = offset_address (destmem, *count, 1);
23608 destmem = offset_address (destmem,
23609 GEN_INT (-size - prolog_size),
23611 if (issetmem)
23612 emit_move_insn (destmem, mode_value);
23613 else
23615 srcmem = offset_address (srcmem, *count, 1);
23616 srcmem = offset_address (srcmem,
23617 GEN_INT (-size - prolog_size),
23619 emit_move_insn (destmem, srcmem);
23621 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23623 destmem = offset_address (destmem, modesize, 1);
23624 if (issetmem)
23625 emit_move_insn (destmem, mode_value);
23626 else
23628 srcmem = offset_address (srcmem, modesize, 1);
23629 emit_move_insn (destmem, srcmem);
23633 /* Align destination. */
23634 if (desired_align > 1 && desired_align > align)
23636 rtx saveddest = *destptr;
23638 gcc_assert (desired_align <= size);
23639 /* Align destptr up, place it to new register. */
23640 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23641 GEN_INT (prolog_size),
23642 NULL_RTX, 1, OPTAB_DIRECT);
23643 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23644 GEN_INT (-desired_align),
23645 *destptr, 1, OPTAB_DIRECT);
23646 /* See how many bytes we skipped. */
23647 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23648 *destptr,
23649 saveddest, 1, OPTAB_DIRECT);
23650 /* Adjust srcptr and count. */
23651 if (!issetmem)
23652 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23653 *srcptr, 1, OPTAB_DIRECT);
23654 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23655 saveddest, *count, 1, OPTAB_DIRECT);
23656 /* We copied at most size + prolog_size. */
23657 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23658 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23659 else
23660 *min_size = 0;
23662 /* Our loops always round down the bock size, but for dispatch to library
23663 we need precise value. */
23664 if (dynamic_check)
23665 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23666 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23668 else
23670 gcc_assert (prolog_size == 0);
23671 /* Decrease count, so we won't end up copying last word twice. */
23672 if (!CONST_INT_P (*count))
23673 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23674 constm1_rtx, *count, 1, OPTAB_DIRECT);
23675 else
23676 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23677 if (*min_size)
23678 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23683 /* This function is like the previous one, except here we know how many bytes
23684 need to be copied. That allows us to update alignment not only of DST, which
23685 is returned, but also of SRC, which is passed as a pointer for that
23686 reason. */
23687 static rtx
23688 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23689 rtx srcreg, rtx value, rtx vec_value,
23690 int desired_align, int align_bytes,
23691 bool issetmem)
23693 rtx src = NULL;
23694 rtx orig_dst = dst;
23695 rtx orig_src = NULL;
23696 int piece_size = 1;
23697 int copied_bytes = 0;
23699 if (!issetmem)
23701 gcc_assert (srcp != NULL);
23702 src = *srcp;
23703 orig_src = src;
23706 for (piece_size = 1;
23707 piece_size <= desired_align && copied_bytes < align_bytes;
23708 piece_size <<= 1)
23710 if (align_bytes & piece_size)
23712 if (issetmem)
23714 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23715 dst = emit_memset (dst, destreg, vec_value, piece_size);
23716 else
23717 dst = emit_memset (dst, destreg, value, piece_size);
23719 else
23720 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23721 copied_bytes += piece_size;
23724 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23725 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23726 if (MEM_SIZE_KNOWN_P (orig_dst))
23727 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23729 if (!issetmem)
23731 int src_align_bytes = get_mem_align_offset (src, desired_align
23732 * BITS_PER_UNIT);
23733 if (src_align_bytes >= 0)
23734 src_align_bytes = desired_align - src_align_bytes;
23735 if (src_align_bytes >= 0)
23737 unsigned int src_align;
23738 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23740 if ((src_align_bytes & (src_align - 1))
23741 == (align_bytes & (src_align - 1)))
23742 break;
23744 if (src_align > (unsigned int) desired_align)
23745 src_align = desired_align;
23746 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23747 set_mem_align (src, src_align * BITS_PER_UNIT);
23749 if (MEM_SIZE_KNOWN_P (orig_src))
23750 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23751 *srcp = src;
23754 return dst;
23757 /* Return true if ALG can be used in current context.
23758 Assume we expand memset if MEMSET is true. */
23759 static bool
23760 alg_usable_p (enum stringop_alg alg, bool memset)
23762 if (alg == no_stringop)
23763 return false;
23764 if (alg == vector_loop)
23765 return TARGET_SSE || TARGET_AVX;
23766 /* Algorithms using the rep prefix want at least edi and ecx;
23767 additionally, memset wants eax and memcpy wants esi. Don't
23768 consider such algorithms if the user has appropriated those
23769 registers for their own purposes. */
23770 if (alg == rep_prefix_1_byte
23771 || alg == rep_prefix_4_byte
23772 || alg == rep_prefix_8_byte)
23773 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23774 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23775 return true;
23778 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23779 static enum stringop_alg
23780 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23781 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23782 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23784 const struct stringop_algs * algs;
23785 bool optimize_for_speed;
23786 int max = -1;
23787 const struct processor_costs *cost;
23788 int i;
23789 bool any_alg_usable_p = false;
23791 *noalign = false;
23792 *dynamic_check = -1;
23794 /* Even if the string operation call is cold, we still might spend a lot
23795 of time processing large blocks. */
23796 if (optimize_function_for_size_p (cfun)
23797 || (optimize_insn_for_size_p ()
23798 && (max_size < 256
23799 || (expected_size != -1 && expected_size < 256))))
23800 optimize_for_speed = false;
23801 else
23802 optimize_for_speed = true;
23804 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23805 if (memset)
23806 algs = &cost->memset[TARGET_64BIT != 0];
23807 else
23808 algs = &cost->memcpy[TARGET_64BIT != 0];
23810 /* See maximal size for user defined algorithm. */
23811 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23813 enum stringop_alg candidate = algs->size[i].alg;
23814 bool usable = alg_usable_p (candidate, memset);
23815 any_alg_usable_p |= usable;
23817 if (candidate != libcall && candidate && usable)
23818 max = algs->size[i].max;
23821 /* If expected size is not known but max size is small enough
23822 so inline version is a win, set expected size into
23823 the range. */
23824 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23825 && expected_size == -1)
23826 expected_size = min_size / 2 + max_size / 2;
23828 /* If user specified the algorithm, honnor it if possible. */
23829 if (ix86_stringop_alg != no_stringop
23830 && alg_usable_p (ix86_stringop_alg, memset))
23831 return ix86_stringop_alg;
23832 /* rep; movq or rep; movl is the smallest variant. */
23833 else if (!optimize_for_speed)
23835 *noalign = true;
23836 if (!count || (count & 3) || (memset && !zero_memset))
23837 return alg_usable_p (rep_prefix_1_byte, memset)
23838 ? rep_prefix_1_byte : loop_1_byte;
23839 else
23840 return alg_usable_p (rep_prefix_4_byte, memset)
23841 ? rep_prefix_4_byte : loop;
23843 /* Very tiny blocks are best handled via the loop, REP is expensive to
23844 setup. */
23845 else if (expected_size != -1 && expected_size < 4)
23846 return loop_1_byte;
23847 else if (expected_size != -1)
23849 enum stringop_alg alg = libcall;
23850 bool alg_noalign = false;
23851 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23853 /* We get here if the algorithms that were not libcall-based
23854 were rep-prefix based and we are unable to use rep prefixes
23855 based on global register usage. Break out of the loop and
23856 use the heuristic below. */
23857 if (algs->size[i].max == 0)
23858 break;
23859 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23861 enum stringop_alg candidate = algs->size[i].alg;
23863 if (candidate != libcall && alg_usable_p (candidate, memset))
23865 alg = candidate;
23866 alg_noalign = algs->size[i].noalign;
23868 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23869 last non-libcall inline algorithm. */
23870 if (TARGET_INLINE_ALL_STRINGOPS)
23872 /* When the current size is best to be copied by a libcall,
23873 but we are still forced to inline, run the heuristic below
23874 that will pick code for medium sized blocks. */
23875 if (alg != libcall)
23877 *noalign = alg_noalign;
23878 return alg;
23880 break;
23882 else if (alg_usable_p (candidate, memset))
23884 *noalign = algs->size[i].noalign;
23885 return candidate;
23890 /* When asked to inline the call anyway, try to pick meaningful choice.
23891 We look for maximal size of block that is faster to copy by hand and
23892 take blocks of at most of that size guessing that average size will
23893 be roughly half of the block.
23895 If this turns out to be bad, we might simply specify the preferred
23896 choice in ix86_costs. */
23897 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23898 && (algs->unknown_size == libcall
23899 || !alg_usable_p (algs->unknown_size, memset)))
23901 enum stringop_alg alg;
23903 /* If there aren't any usable algorithms, then recursing on
23904 smaller sizes isn't going to find anything. Just return the
23905 simple byte-at-a-time copy loop. */
23906 if (!any_alg_usable_p)
23908 /* Pick something reasonable. */
23909 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23910 *dynamic_check = 128;
23911 return loop_1_byte;
23913 if (max == -1)
23914 max = 4096;
23915 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23916 zero_memset, dynamic_check, noalign);
23917 gcc_assert (*dynamic_check == -1);
23918 gcc_assert (alg != libcall);
23919 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23920 *dynamic_check = max;
23921 return alg;
23923 return (alg_usable_p (algs->unknown_size, memset)
23924 ? algs->unknown_size : libcall);
23927 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23928 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23929 static int
23930 decide_alignment (int align,
23931 enum stringop_alg alg,
23932 int expected_size,
23933 enum machine_mode move_mode)
23935 int desired_align = 0;
23937 gcc_assert (alg != no_stringop);
23939 if (alg == libcall)
23940 return 0;
23941 if (move_mode == VOIDmode)
23942 return 0;
23944 desired_align = GET_MODE_SIZE (move_mode);
23945 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23946 copying whole cacheline at once. */
23947 if (TARGET_PENTIUMPRO
23948 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23949 desired_align = 8;
23951 if (optimize_size)
23952 desired_align = 1;
23953 if (desired_align < align)
23954 desired_align = align;
23955 if (expected_size != -1 && expected_size < 4)
23956 desired_align = align;
23958 return desired_align;
23962 /* Helper function for memcpy. For QImode value 0xXY produce
23963 0xXYXYXYXY of wide specified by MODE. This is essentially
23964 a * 0x10101010, but we can do slightly better than
23965 synth_mult by unwinding the sequence by hand on CPUs with
23966 slow multiply. */
23967 static rtx
23968 promote_duplicated_reg (enum machine_mode mode, rtx val)
23970 enum machine_mode valmode = GET_MODE (val);
23971 rtx tmp;
23972 int nops = mode == DImode ? 3 : 2;
23974 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
23975 if (val == const0_rtx)
23976 return copy_to_mode_reg (mode, CONST0_RTX (mode));
23977 if (CONST_INT_P (val))
23979 HOST_WIDE_INT v = INTVAL (val) & 255;
23981 v |= v << 8;
23982 v |= v << 16;
23983 if (mode == DImode)
23984 v |= (v << 16) << 16;
23985 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
23988 if (valmode == VOIDmode)
23989 valmode = QImode;
23990 if (valmode != QImode)
23991 val = gen_lowpart (QImode, val);
23992 if (mode == QImode)
23993 return val;
23994 if (!TARGET_PARTIAL_REG_STALL)
23995 nops--;
23996 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
23997 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
23998 <= (ix86_cost->shift_const + ix86_cost->add) * nops
23999 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24001 rtx reg = convert_modes (mode, QImode, val, true);
24002 tmp = promote_duplicated_reg (mode, const1_rtx);
24003 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24004 OPTAB_DIRECT);
24006 else
24008 rtx reg = convert_modes (mode, QImode, val, true);
24010 if (!TARGET_PARTIAL_REG_STALL)
24011 if (mode == SImode)
24012 emit_insn (gen_movsi_insv_1 (reg, reg));
24013 else
24014 emit_insn (gen_movdi_insv_1 (reg, reg));
24015 else
24017 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24018 NULL, 1, OPTAB_DIRECT);
24019 reg =
24020 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24022 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24023 NULL, 1, OPTAB_DIRECT);
24024 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24025 if (mode == SImode)
24026 return reg;
24027 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24028 NULL, 1, OPTAB_DIRECT);
24029 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24030 return reg;
24034 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24035 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24036 alignment from ALIGN to DESIRED_ALIGN. */
24037 static rtx
24038 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24039 int align)
24041 rtx promoted_val;
24043 if (TARGET_64BIT
24044 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24045 promoted_val = promote_duplicated_reg (DImode, val);
24046 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24047 promoted_val = promote_duplicated_reg (SImode, val);
24048 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24049 promoted_val = promote_duplicated_reg (HImode, val);
24050 else
24051 promoted_val = val;
24053 return promoted_val;
24056 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24057 operations when profitable. The code depends upon architecture, block size
24058 and alignment, but always has one of the following overall structures:
24060 Aligned move sequence:
24062 1) Prologue guard: Conditional that jumps up to epilogues for small
24063 blocks that can be handled by epilogue alone. This is faster
24064 but also needed for correctness, since prologue assume the block
24065 is larger than the desired alignment.
24067 Optional dynamic check for size and libcall for large
24068 blocks is emitted here too, with -minline-stringops-dynamically.
24070 2) Prologue: copy first few bytes in order to get destination
24071 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24072 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24073 copied. We emit either a jump tree on power of two sized
24074 blocks, or a byte loop.
24076 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24077 with specified algorithm.
24079 4) Epilogue: code copying tail of the block that is too small to be
24080 handled by main body (or up to size guarded by prologue guard).
24082 Misaligned move sequence
24084 1) missaligned move prologue/epilogue containing:
24085 a) Prologue handling small memory blocks and jumping to done_label
24086 (skipped if blocks are known to be large enough)
24087 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24088 needed by single possibly misaligned move
24089 (skipped if alignment is not needed)
24090 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24092 2) Zero size guard dispatching to done_label, if needed
24094 3) dispatch to library call, if needed,
24096 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24097 with specified algorithm. */
24098 bool
24099 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24100 rtx align_exp, rtx expected_align_exp,
24101 rtx expected_size_exp, rtx min_size_exp,
24102 rtx max_size_exp, rtx probable_max_size_exp,
24103 bool issetmem)
24105 rtx destreg;
24106 rtx srcreg = NULL;
24107 rtx label = NULL;
24108 rtx tmp;
24109 rtx jump_around_label = NULL;
24110 HOST_WIDE_INT align = 1;
24111 unsigned HOST_WIDE_INT count = 0;
24112 HOST_WIDE_INT expected_size = -1;
24113 int size_needed = 0, epilogue_size_needed;
24114 int desired_align = 0, align_bytes = 0;
24115 enum stringop_alg alg;
24116 rtx promoted_val = NULL;
24117 rtx vec_promoted_val = NULL;
24118 bool force_loopy_epilogue = false;
24119 int dynamic_check;
24120 bool need_zero_guard = false;
24121 bool noalign;
24122 enum machine_mode move_mode = VOIDmode;
24123 int unroll_factor = 1;
24124 /* TODO: Once value ranges are available, fill in proper data. */
24125 unsigned HOST_WIDE_INT min_size = 0;
24126 unsigned HOST_WIDE_INT max_size = -1;
24127 unsigned HOST_WIDE_INT probable_max_size = -1;
24128 bool misaligned_prologue_used = false;
24130 if (CONST_INT_P (align_exp))
24131 align = INTVAL (align_exp);
24132 /* i386 can do misaligned access on reasonably increased cost. */
24133 if (CONST_INT_P (expected_align_exp)
24134 && INTVAL (expected_align_exp) > align)
24135 align = INTVAL (expected_align_exp);
24136 /* ALIGN is the minimum of destination and source alignment, but we care here
24137 just about destination alignment. */
24138 else if (!issetmem
24139 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24140 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24142 if (CONST_INT_P (count_exp))
24143 min_size = max_size = probable_max_size = count = expected_size
24144 = INTVAL (count_exp);
24145 else
24147 if (min_size_exp)
24148 min_size = INTVAL (min_size_exp);
24149 if (max_size_exp)
24150 max_size = INTVAL (max_size_exp);
24151 if (probable_max_size_exp)
24152 probable_max_size = INTVAL (probable_max_size_exp);
24153 if (CONST_INT_P (expected_size_exp) && count == 0)
24154 expected_size = INTVAL (expected_size_exp);
24157 /* Make sure we don't need to care about overflow later on. */
24158 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24159 return false;
24161 /* Step 0: Decide on preferred algorithm, desired alignment and
24162 size of chunks to be copied by main loop. */
24163 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24164 issetmem,
24165 issetmem && val_exp == const0_rtx,
24166 &dynamic_check, &noalign);
24167 if (alg == libcall)
24168 return false;
24169 gcc_assert (alg != no_stringop);
24171 /* For now vector-version of memset is generated only for memory zeroing, as
24172 creating of promoted vector value is very cheap in this case. */
24173 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24174 alg = unrolled_loop;
24176 if (!count)
24177 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24178 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24179 if (!issetmem)
24180 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24182 unroll_factor = 1;
24183 move_mode = word_mode;
24184 switch (alg)
24186 case libcall:
24187 case no_stringop:
24188 case last_alg:
24189 gcc_unreachable ();
24190 case loop_1_byte:
24191 need_zero_guard = true;
24192 move_mode = QImode;
24193 break;
24194 case loop:
24195 need_zero_guard = true;
24196 break;
24197 case unrolled_loop:
24198 need_zero_guard = true;
24199 unroll_factor = (TARGET_64BIT ? 4 : 2);
24200 break;
24201 case vector_loop:
24202 need_zero_guard = true;
24203 unroll_factor = 4;
24204 /* Find the widest supported mode. */
24205 move_mode = word_mode;
24206 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24207 != CODE_FOR_nothing)
24208 move_mode = GET_MODE_WIDER_MODE (move_mode);
24210 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24211 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24212 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24214 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24215 move_mode = mode_for_vector (word_mode, nunits);
24216 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24217 move_mode = word_mode;
24219 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24220 break;
24221 case rep_prefix_8_byte:
24222 move_mode = DImode;
24223 break;
24224 case rep_prefix_4_byte:
24225 move_mode = SImode;
24226 break;
24227 case rep_prefix_1_byte:
24228 move_mode = QImode;
24229 break;
24231 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24232 epilogue_size_needed = size_needed;
24234 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24235 if (!TARGET_ALIGN_STRINGOPS || noalign)
24236 align = desired_align;
24238 /* Step 1: Prologue guard. */
24240 /* Alignment code needs count to be in register. */
24241 if (CONST_INT_P (count_exp) && desired_align > align)
24243 if (INTVAL (count_exp) > desired_align
24244 && INTVAL (count_exp) > size_needed)
24246 align_bytes
24247 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24248 if (align_bytes <= 0)
24249 align_bytes = 0;
24250 else
24251 align_bytes = desired_align - align_bytes;
24253 if (align_bytes == 0)
24254 count_exp = force_reg (counter_mode (count_exp), count_exp);
24256 gcc_assert (desired_align >= 1 && align >= 1);
24258 /* Misaligned move sequences handle both prologue and epilogue at once.
24259 Default code generation results in a smaller code for large alignments
24260 and also avoids redundant job when sizes are known precisely. */
24261 misaligned_prologue_used
24262 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24263 && MAX (desired_align, epilogue_size_needed) <= 32
24264 && desired_align <= epilogue_size_needed
24265 && ((desired_align > align && !align_bytes)
24266 || (!count && epilogue_size_needed > 1)));
24268 /* Do the cheap promotion to allow better CSE across the
24269 main loop and epilogue (ie one load of the big constant in the
24270 front of all code.
24271 For now the misaligned move sequences do not have fast path
24272 without broadcasting. */
24273 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24275 if (alg == vector_loop)
24277 gcc_assert (val_exp == const0_rtx);
24278 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24279 promoted_val = promote_duplicated_reg_to_size (val_exp,
24280 GET_MODE_SIZE (word_mode),
24281 desired_align, align);
24283 else
24285 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24286 desired_align, align);
24289 /* Misaligned move sequences handles both prologues and epilogues at once.
24290 Default code generation results in smaller code for large alignments and
24291 also avoids redundant job when sizes are known precisely. */
24292 if (misaligned_prologue_used)
24294 /* Misaligned move prologue handled small blocks by itself. */
24295 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24296 (dst, src, &destreg, &srcreg,
24297 move_mode, promoted_val, vec_promoted_val,
24298 &count_exp,
24299 &jump_around_label,
24300 desired_align < align
24301 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24302 desired_align, align, &min_size, dynamic_check, issetmem);
24303 if (!issetmem)
24304 src = change_address (src, BLKmode, srcreg);
24305 dst = change_address (dst, BLKmode, destreg);
24306 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24307 epilogue_size_needed = 0;
24308 if (need_zero_guard && !min_size)
24310 /* It is possible that we copied enough so the main loop will not
24311 execute. */
24312 gcc_assert (size_needed > 1);
24313 if (jump_around_label == NULL_RTX)
24314 jump_around_label = gen_label_rtx ();
24315 emit_cmp_and_jump_insns (count_exp,
24316 GEN_INT (size_needed),
24317 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24318 if (expected_size == -1
24319 || expected_size < (desired_align - align) / 2 + size_needed)
24320 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24321 else
24322 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24325 /* Ensure that alignment prologue won't copy past end of block. */
24326 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24328 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24329 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24330 Make sure it is power of 2. */
24331 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24333 /* To improve performance of small blocks, we jump around the VAL
24334 promoting mode. This mean that if the promoted VAL is not constant,
24335 we might not use it in the epilogue and have to use byte
24336 loop variant. */
24337 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24338 force_loopy_epilogue = true;
24339 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24340 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24342 /* If main algorithm works on QImode, no epilogue is needed.
24343 For small sizes just don't align anything. */
24344 if (size_needed == 1)
24345 desired_align = align;
24346 else
24347 goto epilogue;
24349 else if (!count
24350 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24352 label = gen_label_rtx ();
24353 emit_cmp_and_jump_insns (count_exp,
24354 GEN_INT (epilogue_size_needed),
24355 LTU, 0, counter_mode (count_exp), 1, label);
24356 if (expected_size == -1 || expected_size < epilogue_size_needed)
24357 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24358 else
24359 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24363 /* Emit code to decide on runtime whether library call or inline should be
24364 used. */
24365 if (dynamic_check != -1)
24367 if (!issetmem && CONST_INT_P (count_exp))
24369 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24371 emit_block_move_via_libcall (dst, src, count_exp, false);
24372 count_exp = const0_rtx;
24373 goto epilogue;
24376 else
24378 rtx hot_label = gen_label_rtx ();
24379 if (jump_around_label == NULL_RTX)
24380 jump_around_label = gen_label_rtx ();
24381 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24382 LEU, 0, GET_MODE (count_exp), 1, hot_label);
24383 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24384 if (issetmem)
24385 set_storage_via_libcall (dst, count_exp, val_exp, false);
24386 else
24387 emit_block_move_via_libcall (dst, src, count_exp, false);
24388 emit_jump (jump_around_label);
24389 emit_label (hot_label);
24393 /* Step 2: Alignment prologue. */
24394 /* Do the expensive promotion once we branched off the small blocks. */
24395 if (issetmem && !promoted_val)
24396 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24397 desired_align, align);
24399 if (desired_align > align && !misaligned_prologue_used)
24401 if (align_bytes == 0)
24403 /* Except for the first move in prologue, we no longer know
24404 constant offset in aliasing info. It don't seems to worth
24405 the pain to maintain it for the first move, so throw away
24406 the info early. */
24407 dst = change_address (dst, BLKmode, destreg);
24408 if (!issetmem)
24409 src = change_address (src, BLKmode, srcreg);
24410 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24411 promoted_val, vec_promoted_val,
24412 count_exp, align, desired_align,
24413 issetmem);
24414 /* At most desired_align - align bytes are copied. */
24415 if (min_size < (unsigned)(desired_align - align))
24416 min_size = 0;
24417 else
24418 min_size -= desired_align - align;
24420 else
24422 /* If we know how many bytes need to be stored before dst is
24423 sufficiently aligned, maintain aliasing info accurately. */
24424 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24425 srcreg,
24426 promoted_val,
24427 vec_promoted_val,
24428 desired_align,
24429 align_bytes,
24430 issetmem);
24432 count_exp = plus_constant (counter_mode (count_exp),
24433 count_exp, -align_bytes);
24434 count -= align_bytes;
24435 min_size -= align_bytes;
24436 max_size -= align_bytes;
24438 if (need_zero_guard
24439 && !min_size
24440 && (count < (unsigned HOST_WIDE_INT) size_needed
24441 || (align_bytes == 0
24442 && count < ((unsigned HOST_WIDE_INT) size_needed
24443 + desired_align - align))))
24445 /* It is possible that we copied enough so the main loop will not
24446 execute. */
24447 gcc_assert (size_needed > 1);
24448 if (label == NULL_RTX)
24449 label = gen_label_rtx ();
24450 emit_cmp_and_jump_insns (count_exp,
24451 GEN_INT (size_needed),
24452 LTU, 0, counter_mode (count_exp), 1, label);
24453 if (expected_size == -1
24454 || expected_size < (desired_align - align) / 2 + size_needed)
24455 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24456 else
24457 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24460 if (label && size_needed == 1)
24462 emit_label (label);
24463 LABEL_NUSES (label) = 1;
24464 label = NULL;
24465 epilogue_size_needed = 1;
24466 if (issetmem)
24467 promoted_val = val_exp;
24469 else if (label == NULL_RTX && !misaligned_prologue_used)
24470 epilogue_size_needed = size_needed;
24472 /* Step 3: Main loop. */
24474 switch (alg)
24476 case libcall:
24477 case no_stringop:
24478 case last_alg:
24479 gcc_unreachable ();
24480 case loop_1_byte:
24481 case loop:
24482 case unrolled_loop:
24483 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24484 count_exp, move_mode, unroll_factor,
24485 expected_size, issetmem);
24486 break;
24487 case vector_loop:
24488 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24489 vec_promoted_val, count_exp, move_mode,
24490 unroll_factor, expected_size, issetmem);
24491 break;
24492 case rep_prefix_8_byte:
24493 case rep_prefix_4_byte:
24494 case rep_prefix_1_byte:
24495 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24496 val_exp, count_exp, move_mode, issetmem);
24497 break;
24499 /* Adjust properly the offset of src and dest memory for aliasing. */
24500 if (CONST_INT_P (count_exp))
24502 if (!issetmem)
24503 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24504 (count / size_needed) * size_needed);
24505 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24506 (count / size_needed) * size_needed);
24508 else
24510 if (!issetmem)
24511 src = change_address (src, BLKmode, srcreg);
24512 dst = change_address (dst, BLKmode, destreg);
24515 /* Step 4: Epilogue to copy the remaining bytes. */
24516 epilogue:
24517 if (label)
24519 /* When the main loop is done, COUNT_EXP might hold original count,
24520 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24521 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24522 bytes. Compensate if needed. */
24524 if (size_needed < epilogue_size_needed)
24526 tmp =
24527 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24528 GEN_INT (size_needed - 1), count_exp, 1,
24529 OPTAB_DIRECT);
24530 if (tmp != count_exp)
24531 emit_move_insn (count_exp, tmp);
24533 emit_label (label);
24534 LABEL_NUSES (label) = 1;
24537 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24539 if (force_loopy_epilogue)
24540 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24541 epilogue_size_needed);
24542 else
24544 if (issetmem)
24545 expand_setmem_epilogue (dst, destreg, promoted_val,
24546 vec_promoted_val, count_exp,
24547 epilogue_size_needed);
24548 else
24549 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24550 epilogue_size_needed);
24553 if (jump_around_label)
24554 emit_label (jump_around_label);
24555 return true;
24559 /* Expand the appropriate insns for doing strlen if not just doing
24560 repnz; scasb
24562 out = result, initialized with the start address
24563 align_rtx = alignment of the address.
24564 scratch = scratch register, initialized with the startaddress when
24565 not aligned, otherwise undefined
24567 This is just the body. It needs the initializations mentioned above and
24568 some address computing at the end. These things are done in i386.md. */
24570 static void
24571 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24573 int align;
24574 rtx tmp;
24575 rtx align_2_label = NULL_RTX;
24576 rtx align_3_label = NULL_RTX;
24577 rtx align_4_label = gen_label_rtx ();
24578 rtx end_0_label = gen_label_rtx ();
24579 rtx mem;
24580 rtx tmpreg = gen_reg_rtx (SImode);
24581 rtx scratch = gen_reg_rtx (SImode);
24582 rtx cmp;
24584 align = 0;
24585 if (CONST_INT_P (align_rtx))
24586 align = INTVAL (align_rtx);
24588 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24590 /* Is there a known alignment and is it less than 4? */
24591 if (align < 4)
24593 rtx scratch1 = gen_reg_rtx (Pmode);
24594 emit_move_insn (scratch1, out);
24595 /* Is there a known alignment and is it not 2? */
24596 if (align != 2)
24598 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24599 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24601 /* Leave just the 3 lower bits. */
24602 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24603 NULL_RTX, 0, OPTAB_WIDEN);
24605 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24606 Pmode, 1, align_4_label);
24607 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24608 Pmode, 1, align_2_label);
24609 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24610 Pmode, 1, align_3_label);
24612 else
24614 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24615 check if is aligned to 4 - byte. */
24617 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24618 NULL_RTX, 0, OPTAB_WIDEN);
24620 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24621 Pmode, 1, align_4_label);
24624 mem = change_address (src, QImode, out);
24626 /* Now compare the bytes. */
24628 /* Compare the first n unaligned byte on a byte per byte basis. */
24629 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24630 QImode, 1, end_0_label);
24632 /* Increment the address. */
24633 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24635 /* Not needed with an alignment of 2 */
24636 if (align != 2)
24638 emit_label (align_2_label);
24640 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24641 end_0_label);
24643 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24645 emit_label (align_3_label);
24648 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24649 end_0_label);
24651 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24654 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24655 align this loop. It gives only huge programs, but does not help to
24656 speed up. */
24657 emit_label (align_4_label);
24659 mem = change_address (src, SImode, out);
24660 emit_move_insn (scratch, mem);
24661 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24663 /* This formula yields a nonzero result iff one of the bytes is zero.
24664 This saves three branches inside loop and many cycles. */
24666 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24667 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24668 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24669 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24670 gen_int_mode (0x80808080, SImode)));
24671 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24672 align_4_label);
24674 if (TARGET_CMOVE)
24676 rtx reg = gen_reg_rtx (SImode);
24677 rtx reg2 = gen_reg_rtx (Pmode);
24678 emit_move_insn (reg, tmpreg);
24679 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24681 /* If zero is not in the first two bytes, move two bytes forward. */
24682 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24683 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24684 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24685 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24686 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24687 reg,
24688 tmpreg)));
24689 /* Emit lea manually to avoid clobbering of flags. */
24690 emit_insn (gen_rtx_SET (SImode, reg2,
24691 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24693 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24694 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24695 emit_insn (gen_rtx_SET (VOIDmode, out,
24696 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24697 reg2,
24698 out)));
24700 else
24702 rtx end_2_label = gen_label_rtx ();
24703 /* Is zero in the first two bytes? */
24705 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24706 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24707 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24708 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24709 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24710 pc_rtx);
24711 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24712 JUMP_LABEL (tmp) = end_2_label;
24714 /* Not in the first two. Move two bytes forward. */
24715 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24716 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24718 emit_label (end_2_label);
24722 /* Avoid branch in fixing the byte. */
24723 tmpreg = gen_lowpart (QImode, tmpreg);
24724 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24725 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24726 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24727 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24729 emit_label (end_0_label);
24732 /* Expand strlen. */
24734 bool
24735 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24737 rtx addr, scratch1, scratch2, scratch3, scratch4;
24739 /* The generic case of strlen expander is long. Avoid it's
24740 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24742 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24743 && !TARGET_INLINE_ALL_STRINGOPS
24744 && !optimize_insn_for_size_p ()
24745 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24746 return false;
24748 addr = force_reg (Pmode, XEXP (src, 0));
24749 scratch1 = gen_reg_rtx (Pmode);
24751 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24752 && !optimize_insn_for_size_p ())
24754 /* Well it seems that some optimizer does not combine a call like
24755 foo(strlen(bar), strlen(bar));
24756 when the move and the subtraction is done here. It does calculate
24757 the length just once when these instructions are done inside of
24758 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24759 often used and I use one fewer register for the lifetime of
24760 output_strlen_unroll() this is better. */
24762 emit_move_insn (out, addr);
24764 ix86_expand_strlensi_unroll_1 (out, src, align);
24766 /* strlensi_unroll_1 returns the address of the zero at the end of
24767 the string, like memchr(), so compute the length by subtracting
24768 the start address. */
24769 emit_insn (ix86_gen_sub3 (out, out, addr));
24771 else
24773 rtx unspec;
24775 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24776 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24777 return false;
24779 scratch2 = gen_reg_rtx (Pmode);
24780 scratch3 = gen_reg_rtx (Pmode);
24781 scratch4 = force_reg (Pmode, constm1_rtx);
24783 emit_move_insn (scratch3, addr);
24784 eoschar = force_reg (QImode, eoschar);
24786 src = replace_equiv_address_nv (src, scratch3);
24788 /* If .md starts supporting :P, this can be done in .md. */
24789 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24790 scratch4), UNSPEC_SCAS);
24791 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24792 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24793 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24795 return true;
24798 /* For given symbol (function) construct code to compute address of it's PLT
24799 entry in large x86-64 PIC model. */
24800 static rtx
24801 construct_plt_address (rtx symbol)
24803 rtx tmp, unspec;
24805 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24806 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24807 gcc_assert (Pmode == DImode);
24809 tmp = gen_reg_rtx (Pmode);
24810 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24812 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24813 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24814 return tmp;
24818 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24819 rtx callarg2,
24820 rtx pop, bool sibcall)
24822 unsigned int const cregs_size
24823 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24824 rtx vec[3 + cregs_size];
24825 rtx use = NULL, call;
24826 unsigned int vec_len = 0;
24828 if (pop == const0_rtx)
24829 pop = NULL;
24830 gcc_assert (!TARGET_64BIT || !pop);
24832 if (TARGET_MACHO && !TARGET_64BIT)
24834 #if TARGET_MACHO
24835 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24836 fnaddr = machopic_indirect_call_target (fnaddr);
24837 #endif
24839 else
24841 /* Static functions and indirect calls don't need the pic register. */
24842 if (flag_pic
24843 && (!TARGET_64BIT
24844 || (ix86_cmodel == CM_LARGE_PIC
24845 && DEFAULT_ABI != MS_ABI))
24846 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24847 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24848 use_reg (&use, pic_offset_table_rtx);
24851 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24853 rtx al = gen_rtx_REG (QImode, AX_REG);
24854 emit_move_insn (al, callarg2);
24855 use_reg (&use, al);
24858 if (ix86_cmodel == CM_LARGE_PIC
24859 && !TARGET_PECOFF
24860 && MEM_P (fnaddr)
24861 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24862 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24863 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24864 else if (sibcall
24865 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24866 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24868 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24869 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24872 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24873 if (retval)
24874 call = gen_rtx_SET (VOIDmode, retval, call);
24875 vec[vec_len++] = call;
24877 if (pop)
24879 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24880 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24881 vec[vec_len++] = pop;
24884 if (TARGET_64BIT_MS_ABI
24885 && (!callarg2 || INTVAL (callarg2) != -2))
24887 unsigned i;
24889 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24890 UNSPEC_MS_TO_SYSV_CALL);
24892 for (i = 0; i < cregs_size; i++)
24894 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24895 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24897 vec[vec_len++]
24898 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24902 if (vec_len > 1)
24903 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24904 call = emit_call_insn (call);
24905 if (use)
24906 CALL_INSN_FUNCTION_USAGE (call) = use;
24908 return call;
24911 /* Output the assembly for a call instruction. */
24913 const char *
24914 ix86_output_call_insn (rtx insn, rtx call_op)
24916 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24917 bool seh_nop_p = false;
24918 const char *xasm;
24920 if (SIBLING_CALL_P (insn))
24922 if (direct_p)
24923 xasm = "jmp\t%P0";
24924 /* SEH epilogue detection requires the indirect branch case
24925 to include REX.W. */
24926 else if (TARGET_SEH)
24927 xasm = "rex.W jmp %A0";
24928 else
24929 xasm = "jmp\t%A0";
24931 output_asm_insn (xasm, &call_op);
24932 return "";
24935 /* SEH unwinding can require an extra nop to be emitted in several
24936 circumstances. Determine if we have one of those. */
24937 if (TARGET_SEH)
24939 rtx i;
24941 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24943 /* If we get to another real insn, we don't need the nop. */
24944 if (INSN_P (i))
24945 break;
24947 /* If we get to the epilogue note, prevent a catch region from
24948 being adjacent to the standard epilogue sequence. If non-
24949 call-exceptions, we'll have done this during epilogue emission. */
24950 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24951 && !flag_non_call_exceptions
24952 && !can_throw_internal (insn))
24954 seh_nop_p = true;
24955 break;
24959 /* If we didn't find a real insn following the call, prevent the
24960 unwinder from looking into the next function. */
24961 if (i == NULL)
24962 seh_nop_p = true;
24965 if (direct_p)
24966 xasm = "call\t%P0";
24967 else
24968 xasm = "call\t%A0";
24970 output_asm_insn (xasm, &call_op);
24972 if (seh_nop_p)
24973 return "nop";
24975 return "";
24978 /* Clear stack slot assignments remembered from previous functions.
24979 This is called from INIT_EXPANDERS once before RTL is emitted for each
24980 function. */
24982 static struct machine_function *
24983 ix86_init_machine_status (void)
24985 struct machine_function *f;
24987 f = ggc_alloc_cleared_machine_function ();
24988 f->use_fast_prologue_epilogue_nregs = -1;
24989 f->call_abi = ix86_abi;
24991 return f;
24994 /* Return a MEM corresponding to a stack slot with mode MODE.
24995 Allocate a new slot if necessary.
24997 The RTL for a function can have several slots available: N is
24998 which slot to use. */
25001 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25003 struct stack_local_entry *s;
25005 gcc_assert (n < MAX_386_STACK_LOCALS);
25007 for (s = ix86_stack_locals; s; s = s->next)
25008 if (s->mode == mode && s->n == n)
25009 return validize_mem (copy_rtx (s->rtl));
25011 s = ggc_alloc_stack_local_entry ();
25012 s->n = n;
25013 s->mode = mode;
25014 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25016 s->next = ix86_stack_locals;
25017 ix86_stack_locals = s;
25018 return validize_mem (s->rtl);
25021 static void
25022 ix86_instantiate_decls (void)
25024 struct stack_local_entry *s;
25026 for (s = ix86_stack_locals; s; s = s->next)
25027 if (s->rtl != NULL_RTX)
25028 instantiate_decl_rtl (s->rtl);
25031 /* Check whether x86 address PARTS is a pc-relative address. */
25033 static bool
25034 rip_relative_addr_p (struct ix86_address *parts)
25036 rtx base, index, disp;
25038 base = parts->base;
25039 index = parts->index;
25040 disp = parts->disp;
25042 if (disp && !base && !index)
25044 if (TARGET_64BIT)
25046 rtx symbol = disp;
25048 if (GET_CODE (disp) == CONST)
25049 symbol = XEXP (disp, 0);
25050 if (GET_CODE (symbol) == PLUS
25051 && CONST_INT_P (XEXP (symbol, 1)))
25052 symbol = XEXP (symbol, 0);
25054 if (GET_CODE (symbol) == LABEL_REF
25055 || (GET_CODE (symbol) == SYMBOL_REF
25056 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25057 || (GET_CODE (symbol) == UNSPEC
25058 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25059 || XINT (symbol, 1) == UNSPEC_PCREL
25060 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25061 return true;
25064 return false;
25067 /* Calculate the length of the memory address in the instruction encoding.
25068 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25069 or other prefixes. We never generate addr32 prefix for LEA insn. */
25072 memory_address_length (rtx addr, bool lea)
25074 struct ix86_address parts;
25075 rtx base, index, disp;
25076 int len;
25077 int ok;
25079 if (GET_CODE (addr) == PRE_DEC
25080 || GET_CODE (addr) == POST_INC
25081 || GET_CODE (addr) == PRE_MODIFY
25082 || GET_CODE (addr) == POST_MODIFY)
25083 return 0;
25085 ok = ix86_decompose_address (addr, &parts);
25086 gcc_assert (ok);
25088 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25090 /* If this is not LEA instruction, add the length of addr32 prefix. */
25091 if (TARGET_64BIT && !lea
25092 && (SImode_address_operand (addr, VOIDmode)
25093 || (parts.base && GET_MODE (parts.base) == SImode)
25094 || (parts.index && GET_MODE (parts.index) == SImode)))
25095 len++;
25097 base = parts.base;
25098 index = parts.index;
25099 disp = parts.disp;
25101 if (base && GET_CODE (base) == SUBREG)
25102 base = SUBREG_REG (base);
25103 if (index && GET_CODE (index) == SUBREG)
25104 index = SUBREG_REG (index);
25106 gcc_assert (base == NULL_RTX || REG_P (base));
25107 gcc_assert (index == NULL_RTX || REG_P (index));
25109 /* Rule of thumb:
25110 - esp as the base always wants an index,
25111 - ebp as the base always wants a displacement,
25112 - r12 as the base always wants an index,
25113 - r13 as the base always wants a displacement. */
25115 /* Register Indirect. */
25116 if (base && !index && !disp)
25118 /* esp (for its index) and ebp (for its displacement) need
25119 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25120 code. */
25121 if (base == arg_pointer_rtx
25122 || base == frame_pointer_rtx
25123 || REGNO (base) == SP_REG
25124 || REGNO (base) == BP_REG
25125 || REGNO (base) == R12_REG
25126 || REGNO (base) == R13_REG)
25127 len++;
25130 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25131 is not disp32, but disp32(%rip), so for disp32
25132 SIB byte is needed, unless print_operand_address
25133 optimizes it into disp32(%rip) or (%rip) is implied
25134 by UNSPEC. */
25135 else if (disp && !base && !index)
25137 len += 4;
25138 if (rip_relative_addr_p (&parts))
25139 len++;
25141 else
25143 /* Find the length of the displacement constant. */
25144 if (disp)
25146 if (base && satisfies_constraint_K (disp))
25147 len += 1;
25148 else
25149 len += 4;
25151 /* ebp always wants a displacement. Similarly r13. */
25152 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25153 len++;
25155 /* An index requires the two-byte modrm form.... */
25156 if (index
25157 /* ...like esp (or r12), which always wants an index. */
25158 || base == arg_pointer_rtx
25159 || base == frame_pointer_rtx
25160 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25161 len++;
25164 return len;
25167 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25168 is set, expect that insn have 8bit immediate alternative. */
25170 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25172 int len = 0;
25173 int i;
25174 extract_insn_cached (insn);
25175 for (i = recog_data.n_operands - 1; i >= 0; --i)
25176 if (CONSTANT_P (recog_data.operand[i]))
25178 enum attr_mode mode = get_attr_mode (insn);
25180 gcc_assert (!len);
25181 if (shortform && CONST_INT_P (recog_data.operand[i]))
25183 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25184 switch (mode)
25186 case MODE_QI:
25187 len = 1;
25188 continue;
25189 case MODE_HI:
25190 ival = trunc_int_for_mode (ival, HImode);
25191 break;
25192 case MODE_SI:
25193 ival = trunc_int_for_mode (ival, SImode);
25194 break;
25195 default:
25196 break;
25198 if (IN_RANGE (ival, -128, 127))
25200 len = 1;
25201 continue;
25204 switch (mode)
25206 case MODE_QI:
25207 len = 1;
25208 break;
25209 case MODE_HI:
25210 len = 2;
25211 break;
25212 case MODE_SI:
25213 len = 4;
25214 break;
25215 /* Immediates for DImode instructions are encoded
25216 as 32bit sign extended values. */
25217 case MODE_DI:
25218 len = 4;
25219 break;
25220 default:
25221 fatal_insn ("unknown insn mode", insn);
25224 return len;
25227 /* Compute default value for "length_address" attribute. */
25229 ix86_attr_length_address_default (rtx insn)
25231 int i;
25233 if (get_attr_type (insn) == TYPE_LEA)
25235 rtx set = PATTERN (insn), addr;
25237 if (GET_CODE (set) == PARALLEL)
25238 set = XVECEXP (set, 0, 0);
25240 gcc_assert (GET_CODE (set) == SET);
25242 addr = SET_SRC (set);
25244 return memory_address_length (addr, true);
25247 extract_insn_cached (insn);
25248 for (i = recog_data.n_operands - 1; i >= 0; --i)
25249 if (MEM_P (recog_data.operand[i]))
25251 constrain_operands_cached (reload_completed);
25252 if (which_alternative != -1)
25254 const char *constraints = recog_data.constraints[i];
25255 int alt = which_alternative;
25257 while (*constraints == '=' || *constraints == '+')
25258 constraints++;
25259 while (alt-- > 0)
25260 while (*constraints++ != ',')
25262 /* Skip ignored operands. */
25263 if (*constraints == 'X')
25264 continue;
25266 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25268 return 0;
25271 /* Compute default value for "length_vex" attribute. It includes
25272 2 or 3 byte VEX prefix and 1 opcode byte. */
25275 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25277 int i;
25279 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25280 byte VEX prefix. */
25281 if (!has_0f_opcode || has_vex_w)
25282 return 3 + 1;
25284 /* We can always use 2 byte VEX prefix in 32bit. */
25285 if (!TARGET_64BIT)
25286 return 2 + 1;
25288 extract_insn_cached (insn);
25290 for (i = recog_data.n_operands - 1; i >= 0; --i)
25291 if (REG_P (recog_data.operand[i]))
25293 /* REX.W bit uses 3 byte VEX prefix. */
25294 if (GET_MODE (recog_data.operand[i]) == DImode
25295 && GENERAL_REG_P (recog_data.operand[i]))
25296 return 3 + 1;
25298 else
25300 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25301 if (MEM_P (recog_data.operand[i])
25302 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25303 return 3 + 1;
25306 return 2 + 1;
25309 /* Return the maximum number of instructions a cpu can issue. */
25311 static int
25312 ix86_issue_rate (void)
25314 switch (ix86_tune)
25316 case PROCESSOR_PENTIUM:
25317 case PROCESSOR_BONNELL:
25318 case PROCESSOR_SILVERMONT:
25319 case PROCESSOR_INTEL:
25320 case PROCESSOR_K6:
25321 case PROCESSOR_BTVER2:
25322 case PROCESSOR_PENTIUM4:
25323 case PROCESSOR_NOCONA:
25324 return 2;
25326 case PROCESSOR_PENTIUMPRO:
25327 case PROCESSOR_ATHLON:
25328 case PROCESSOR_K8:
25329 case PROCESSOR_AMDFAM10:
25330 case PROCESSOR_GENERIC:
25331 case PROCESSOR_BTVER1:
25332 return 3;
25334 case PROCESSOR_BDVER1:
25335 case PROCESSOR_BDVER2:
25336 case PROCESSOR_BDVER3:
25337 case PROCESSOR_BDVER4:
25338 case PROCESSOR_CORE2:
25339 case PROCESSOR_NEHALEM:
25340 case PROCESSOR_SANDYBRIDGE:
25341 case PROCESSOR_HASWELL:
25342 return 4;
25344 default:
25345 return 1;
25349 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25350 by DEP_INSN and nothing set by DEP_INSN. */
25352 static bool
25353 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25355 rtx set, set2;
25357 /* Simplify the test for uninteresting insns. */
25358 if (insn_type != TYPE_SETCC
25359 && insn_type != TYPE_ICMOV
25360 && insn_type != TYPE_FCMOV
25361 && insn_type != TYPE_IBR)
25362 return false;
25364 if ((set = single_set (dep_insn)) != 0)
25366 set = SET_DEST (set);
25367 set2 = NULL_RTX;
25369 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25370 && XVECLEN (PATTERN (dep_insn), 0) == 2
25371 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25372 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25374 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25375 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25377 else
25378 return false;
25380 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25381 return false;
25383 /* This test is true if the dependent insn reads the flags but
25384 not any other potentially set register. */
25385 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25386 return false;
25388 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25389 return false;
25391 return true;
25394 /* Return true iff USE_INSN has a memory address with operands set by
25395 SET_INSN. */
25397 bool
25398 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25400 int i;
25401 extract_insn_cached (use_insn);
25402 for (i = recog_data.n_operands - 1; i >= 0; --i)
25403 if (MEM_P (recog_data.operand[i]))
25405 rtx addr = XEXP (recog_data.operand[i], 0);
25406 return modified_in_p (addr, set_insn) != 0;
25408 return false;
25411 /* Helper function for exact_store_load_dependency.
25412 Return true if addr is found in insn. */
25413 static bool
25414 exact_dependency_1 (rtx addr, rtx insn)
25416 enum rtx_code code;
25417 const char *format_ptr;
25418 int i, j;
25420 code = GET_CODE (insn);
25421 switch (code)
25423 case MEM:
25424 if (rtx_equal_p (addr, insn))
25425 return true;
25426 break;
25427 case REG:
25428 CASE_CONST_ANY:
25429 case SYMBOL_REF:
25430 case CODE_LABEL:
25431 case PC:
25432 case CC0:
25433 case EXPR_LIST:
25434 return false;
25435 default:
25436 break;
25439 format_ptr = GET_RTX_FORMAT (code);
25440 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25442 switch (*format_ptr++)
25444 case 'e':
25445 if (exact_dependency_1 (addr, XEXP (insn, i)))
25446 return true;
25447 break;
25448 case 'E':
25449 for (j = 0; j < XVECLEN (insn, i); j++)
25450 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25451 return true;
25452 break;
25455 return false;
25458 /* Return true if there exists exact dependency for store & load, i.e.
25459 the same memory address is used in them. */
25460 static bool
25461 exact_store_load_dependency (rtx store, rtx load)
25463 rtx set1, set2;
25465 set1 = single_set (store);
25466 if (!set1)
25467 return false;
25468 if (!MEM_P (SET_DEST (set1)))
25469 return false;
25470 set2 = single_set (load);
25471 if (!set2)
25472 return false;
25473 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25474 return true;
25475 return false;
25478 static int
25479 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25481 enum attr_type insn_type, dep_insn_type;
25482 enum attr_memory memory;
25483 rtx set, set2;
25484 int dep_insn_code_number;
25486 /* Anti and output dependencies have zero cost on all CPUs. */
25487 if (REG_NOTE_KIND (link) != 0)
25488 return 0;
25490 dep_insn_code_number = recog_memoized (dep_insn);
25492 /* If we can't recognize the insns, we can't really do anything. */
25493 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25494 return cost;
25496 insn_type = get_attr_type (insn);
25497 dep_insn_type = get_attr_type (dep_insn);
25499 switch (ix86_tune)
25501 case PROCESSOR_PENTIUM:
25502 /* Address Generation Interlock adds a cycle of latency. */
25503 if (insn_type == TYPE_LEA)
25505 rtx addr = PATTERN (insn);
25507 if (GET_CODE (addr) == PARALLEL)
25508 addr = XVECEXP (addr, 0, 0);
25510 gcc_assert (GET_CODE (addr) == SET);
25512 addr = SET_SRC (addr);
25513 if (modified_in_p (addr, dep_insn))
25514 cost += 1;
25516 else if (ix86_agi_dependent (dep_insn, insn))
25517 cost += 1;
25519 /* ??? Compares pair with jump/setcc. */
25520 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25521 cost = 0;
25523 /* Floating point stores require value to be ready one cycle earlier. */
25524 if (insn_type == TYPE_FMOV
25525 && get_attr_memory (insn) == MEMORY_STORE
25526 && !ix86_agi_dependent (dep_insn, insn))
25527 cost += 1;
25528 break;
25530 case PROCESSOR_PENTIUMPRO:
25531 /* INT->FP conversion is expensive. */
25532 if (get_attr_fp_int_src (dep_insn))
25533 cost += 5;
25535 /* There is one cycle extra latency between an FP op and a store. */
25536 if (insn_type == TYPE_FMOV
25537 && (set = single_set (dep_insn)) != NULL_RTX
25538 && (set2 = single_set (insn)) != NULL_RTX
25539 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25540 && MEM_P (SET_DEST (set2)))
25541 cost += 1;
25543 memory = get_attr_memory (insn);
25545 /* Show ability of reorder buffer to hide latency of load by executing
25546 in parallel with previous instruction in case
25547 previous instruction is not needed to compute the address. */
25548 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25549 && !ix86_agi_dependent (dep_insn, insn))
25551 /* Claim moves to take one cycle, as core can issue one load
25552 at time and the next load can start cycle later. */
25553 if (dep_insn_type == TYPE_IMOV
25554 || dep_insn_type == TYPE_FMOV)
25555 cost = 1;
25556 else if (cost > 1)
25557 cost--;
25559 break;
25561 case PROCESSOR_K6:
25562 /* The esp dependency is resolved before
25563 the instruction is really finished. */
25564 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25565 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25566 return 1;
25568 /* INT->FP conversion is expensive. */
25569 if (get_attr_fp_int_src (dep_insn))
25570 cost += 5;
25572 memory = get_attr_memory (insn);
25574 /* Show ability of reorder buffer to hide latency of load by executing
25575 in parallel with previous instruction in case
25576 previous instruction is not needed to compute the address. */
25577 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25578 && !ix86_agi_dependent (dep_insn, insn))
25580 /* Claim moves to take one cycle, as core can issue one load
25581 at time and the next load can start cycle later. */
25582 if (dep_insn_type == TYPE_IMOV
25583 || dep_insn_type == TYPE_FMOV)
25584 cost = 1;
25585 else if (cost > 2)
25586 cost -= 2;
25587 else
25588 cost = 1;
25590 break;
25592 case PROCESSOR_AMDFAM10:
25593 case PROCESSOR_BDVER1:
25594 case PROCESSOR_BDVER2:
25595 case PROCESSOR_BDVER3:
25596 case PROCESSOR_BDVER4:
25597 case PROCESSOR_BTVER1:
25598 case PROCESSOR_BTVER2:
25599 case PROCESSOR_GENERIC:
25600 /* Stack engine allows to execute push&pop instructions in parall. */
25601 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25602 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25603 return 0;
25604 /* FALLTHRU */
25606 case PROCESSOR_ATHLON:
25607 case PROCESSOR_K8:
25608 memory = get_attr_memory (insn);
25610 /* Show ability of reorder buffer to hide latency of load by executing
25611 in parallel with previous instruction in case
25612 previous instruction is not needed to compute the address. */
25613 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25614 && !ix86_agi_dependent (dep_insn, insn))
25616 enum attr_unit unit = get_attr_unit (insn);
25617 int loadcost = 3;
25619 /* Because of the difference between the length of integer and
25620 floating unit pipeline preparation stages, the memory operands
25621 for floating point are cheaper.
25623 ??? For Athlon it the difference is most probably 2. */
25624 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25625 loadcost = 3;
25626 else
25627 loadcost = TARGET_ATHLON ? 2 : 0;
25629 if (cost >= loadcost)
25630 cost -= loadcost;
25631 else
25632 cost = 0;
25634 break;
25636 case PROCESSOR_CORE2:
25637 case PROCESSOR_NEHALEM:
25638 case PROCESSOR_SANDYBRIDGE:
25639 case PROCESSOR_HASWELL:
25640 /* Stack engine allows to execute push&pop instructions in parall. */
25641 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25642 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25643 return 0;
25645 memory = get_attr_memory (insn);
25647 /* Show ability of reorder buffer to hide latency of load by executing
25648 in parallel with previous instruction in case
25649 previous instruction is not needed to compute the address. */
25650 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25651 && !ix86_agi_dependent (dep_insn, insn))
25653 if (cost >= 4)
25654 cost -= 4;
25655 else
25656 cost = 0;
25658 break;
25660 case PROCESSOR_SILVERMONT:
25661 case PROCESSOR_INTEL:
25662 if (!reload_completed)
25663 return cost;
25665 /* Increase cost of integer loads. */
25666 memory = get_attr_memory (dep_insn);
25667 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25669 enum attr_unit unit = get_attr_unit (dep_insn);
25670 if (unit == UNIT_INTEGER && cost == 1)
25672 if (memory == MEMORY_LOAD)
25673 cost = 3;
25674 else
25676 /* Increase cost of ld/st for short int types only
25677 because of store forwarding issue. */
25678 rtx set = single_set (dep_insn);
25679 if (set && (GET_MODE (SET_DEST (set)) == QImode
25680 || GET_MODE (SET_DEST (set)) == HImode))
25682 /* Increase cost of store/load insn if exact
25683 dependence exists and it is load insn. */
25684 enum attr_memory insn_memory = get_attr_memory (insn);
25685 if (insn_memory == MEMORY_LOAD
25686 && exact_store_load_dependency (dep_insn, insn))
25687 cost = 3;
25693 default:
25694 break;
25697 return cost;
25700 /* How many alternative schedules to try. This should be as wide as the
25701 scheduling freedom in the DFA, but no wider. Making this value too
25702 large results extra work for the scheduler. */
25704 static int
25705 ia32_multipass_dfa_lookahead (void)
25707 switch (ix86_tune)
25709 case PROCESSOR_PENTIUM:
25710 return 2;
25712 case PROCESSOR_PENTIUMPRO:
25713 case PROCESSOR_K6:
25714 return 1;
25716 case PROCESSOR_BDVER1:
25717 case PROCESSOR_BDVER2:
25718 case PROCESSOR_BDVER3:
25719 case PROCESSOR_BDVER4:
25720 /* We use lookahead value 4 for BD both before and after reload
25721 schedules. Plan is to have value 8 included for O3. */
25722 return 4;
25724 case PROCESSOR_CORE2:
25725 case PROCESSOR_NEHALEM:
25726 case PROCESSOR_SANDYBRIDGE:
25727 case PROCESSOR_HASWELL:
25728 case PROCESSOR_BONNELL:
25729 case PROCESSOR_SILVERMONT:
25730 case PROCESSOR_INTEL:
25731 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25732 as many instructions can be executed on a cycle, i.e.,
25733 issue_rate. I wonder why tuning for many CPUs does not do this. */
25734 if (reload_completed)
25735 return ix86_issue_rate ();
25736 /* Don't use lookahead for pre-reload schedule to save compile time. */
25737 return 0;
25739 default:
25740 return 0;
25744 /* Return true if target platform supports macro-fusion. */
25746 static bool
25747 ix86_macro_fusion_p ()
25749 return TARGET_FUSE_CMP_AND_BRANCH;
25752 /* Check whether current microarchitecture support macro fusion
25753 for insn pair "CONDGEN + CONDJMP". Refer to
25754 "Intel Architectures Optimization Reference Manual". */
25756 static bool
25757 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25759 rtx src, dest;
25760 rtx single_set = single_set (condgen);
25761 enum rtx_code ccode;
25762 rtx compare_set = NULL_RTX, test_if, cond;
25763 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25765 if (get_attr_type (condgen) != TYPE_TEST
25766 && get_attr_type (condgen) != TYPE_ICMP
25767 && get_attr_type (condgen) != TYPE_INCDEC
25768 && get_attr_type (condgen) != TYPE_ALU)
25769 return false;
25771 if (single_set == NULL_RTX
25772 && !TARGET_FUSE_ALU_AND_BRANCH)
25773 return false;
25775 if (single_set != NULL_RTX)
25776 compare_set = single_set;
25777 else
25779 int i;
25780 rtx pat = PATTERN (condgen);
25781 for (i = 0; i < XVECLEN (pat, 0); i++)
25782 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25784 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25785 if (GET_CODE (set_src) == COMPARE)
25786 compare_set = XVECEXP (pat, 0, i);
25787 else
25788 alu_set = XVECEXP (pat, 0, i);
25791 if (compare_set == NULL_RTX)
25792 return false;
25793 src = SET_SRC (compare_set);
25794 if (GET_CODE (src) != COMPARE)
25795 return false;
25797 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25798 supported. */
25799 if ((MEM_P (XEXP (src, 0))
25800 && CONST_INT_P (XEXP (src, 1)))
25801 || (MEM_P (XEXP (src, 1))
25802 && CONST_INT_P (XEXP (src, 0))))
25803 return false;
25805 /* No fusion for RIP-relative address. */
25806 if (MEM_P (XEXP (src, 0)))
25807 addr = XEXP (XEXP (src, 0), 0);
25808 else if (MEM_P (XEXP (src, 1)))
25809 addr = XEXP (XEXP (src, 1), 0);
25811 if (addr) {
25812 ix86_address parts;
25813 int ok = ix86_decompose_address (addr, &parts);
25814 gcc_assert (ok);
25816 if (rip_relative_addr_p (&parts))
25817 return false;
25820 test_if = SET_SRC (pc_set (condjmp));
25821 cond = XEXP (test_if, 0);
25822 ccode = GET_CODE (cond);
25823 /* Check whether conditional jump use Sign or Overflow Flags. */
25824 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25825 && (ccode == GE
25826 || ccode == GT
25827 || ccode == LE
25828 || ccode == LT))
25829 return false;
25831 /* Return true for TYPE_TEST and TYPE_ICMP. */
25832 if (get_attr_type (condgen) == TYPE_TEST
25833 || get_attr_type (condgen) == TYPE_ICMP)
25834 return true;
25836 /* The following is the case that macro-fusion for alu + jmp. */
25837 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25838 return false;
25840 /* No fusion for alu op with memory destination operand. */
25841 dest = SET_DEST (alu_set);
25842 if (MEM_P (dest))
25843 return false;
25845 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25846 supported. */
25847 if (get_attr_type (condgen) == TYPE_INCDEC
25848 && (ccode == GEU
25849 || ccode == GTU
25850 || ccode == LEU
25851 || ccode == LTU))
25852 return false;
25854 return true;
25857 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25858 execution. It is applied if
25859 (1) IMUL instruction is on the top of list;
25860 (2) There exists the only producer of independent IMUL instruction in
25861 ready list.
25862 Return index of IMUL producer if it was found and -1 otherwise. */
25863 static int
25864 do_reorder_for_imul (rtx *ready, int n_ready)
25866 rtx insn, set, insn1, insn2;
25867 sd_iterator_def sd_it;
25868 dep_t dep;
25869 int index = -1;
25870 int i;
25872 if (!TARGET_BONNELL)
25873 return index;
25875 /* Check that IMUL instruction is on the top of ready list. */
25876 insn = ready[n_ready - 1];
25877 set = single_set (insn);
25878 if (!set)
25879 return index;
25880 if (!(GET_CODE (SET_SRC (set)) == MULT
25881 && GET_MODE (SET_SRC (set)) == SImode))
25882 return index;
25884 /* Search for producer of independent IMUL instruction. */
25885 for (i = n_ready - 2; i >= 0; i--)
25887 insn = ready[i];
25888 if (!NONDEBUG_INSN_P (insn))
25889 continue;
25890 /* Skip IMUL instruction. */
25891 insn2 = PATTERN (insn);
25892 if (GET_CODE (insn2) == PARALLEL)
25893 insn2 = XVECEXP (insn2, 0, 0);
25894 if (GET_CODE (insn2) == SET
25895 && GET_CODE (SET_SRC (insn2)) == MULT
25896 && GET_MODE (SET_SRC (insn2)) == SImode)
25897 continue;
25899 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25901 rtx con;
25902 con = DEP_CON (dep);
25903 if (!NONDEBUG_INSN_P (con))
25904 continue;
25905 insn1 = PATTERN (con);
25906 if (GET_CODE (insn1) == PARALLEL)
25907 insn1 = XVECEXP (insn1, 0, 0);
25909 if (GET_CODE (insn1) == SET
25910 && GET_CODE (SET_SRC (insn1)) == MULT
25911 && GET_MODE (SET_SRC (insn1)) == SImode)
25913 sd_iterator_def sd_it1;
25914 dep_t dep1;
25915 /* Check if there is no other dependee for IMUL. */
25916 index = i;
25917 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25919 rtx pro;
25920 pro = DEP_PRO (dep1);
25921 if (!NONDEBUG_INSN_P (pro))
25922 continue;
25923 if (pro != insn)
25924 index = -1;
25926 if (index >= 0)
25927 break;
25930 if (index >= 0)
25931 break;
25933 return index;
25936 /* Try to find the best candidate on the top of ready list if two insns
25937 have the same priority - candidate is best if its dependees were
25938 scheduled earlier. Applied for Silvermont only.
25939 Return true if top 2 insns must be interchanged. */
25940 static bool
25941 swap_top_of_ready_list (rtx *ready, int n_ready)
25943 rtx top = ready[n_ready - 1];
25944 rtx next = ready[n_ready - 2];
25945 rtx set;
25946 sd_iterator_def sd_it;
25947 dep_t dep;
25948 int clock1 = -1;
25949 int clock2 = -1;
25950 #define INSN_TICK(INSN) (HID (INSN)->tick)
25952 if (!TARGET_SILVERMONT && !TARGET_INTEL)
25953 return false;
25955 if (!NONDEBUG_INSN_P (top))
25956 return false;
25957 if (!NONJUMP_INSN_P (top))
25958 return false;
25959 if (!NONDEBUG_INSN_P (next))
25960 return false;
25961 if (!NONJUMP_INSN_P (next))
25962 return false;
25963 set = single_set (top);
25964 if (!set)
25965 return false;
25966 set = single_set (next);
25967 if (!set)
25968 return false;
25970 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
25972 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
25973 return false;
25974 /* Determine winner more precise. */
25975 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
25977 rtx pro;
25978 pro = DEP_PRO (dep);
25979 if (!NONDEBUG_INSN_P (pro))
25980 continue;
25981 if (INSN_TICK (pro) > clock1)
25982 clock1 = INSN_TICK (pro);
25984 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
25986 rtx pro;
25987 pro = DEP_PRO (dep);
25988 if (!NONDEBUG_INSN_P (pro))
25989 continue;
25990 if (INSN_TICK (pro) > clock2)
25991 clock2 = INSN_TICK (pro);
25994 if (clock1 == clock2)
25996 /* Determine winner - load must win. */
25997 enum attr_memory memory1, memory2;
25998 memory1 = get_attr_memory (top);
25999 memory2 = get_attr_memory (next);
26000 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26001 return true;
26003 return (bool) (clock2 < clock1);
26005 return false;
26006 #undef INSN_TICK
26009 /* Perform possible reodering of ready list for Atom/Silvermont only.
26010 Return issue rate. */
26011 static int
26012 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26013 int clock_var)
26015 int issue_rate = -1;
26016 int n_ready = *pn_ready;
26017 int i;
26018 rtx insn;
26019 int index = -1;
26021 /* Set up issue rate. */
26022 issue_rate = ix86_issue_rate ();
26024 /* Do reodering for BONNELL/SILVERMONT only. */
26025 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26026 return issue_rate;
26028 /* Nothing to do if ready list contains only 1 instruction. */
26029 if (n_ready <= 1)
26030 return issue_rate;
26032 /* Do reodering for post-reload scheduler only. */
26033 if (!reload_completed)
26034 return issue_rate;
26036 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26038 if (sched_verbose > 1)
26039 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26040 INSN_UID (ready[index]));
26042 /* Put IMUL producer (ready[index]) at the top of ready list. */
26043 insn = ready[index];
26044 for (i = index; i < n_ready - 1; i++)
26045 ready[i] = ready[i + 1];
26046 ready[n_ready - 1] = insn;
26047 return issue_rate;
26049 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26051 if (sched_verbose > 1)
26052 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26053 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26054 /* Swap 2 top elements of ready list. */
26055 insn = ready[n_ready - 1];
26056 ready[n_ready - 1] = ready[n_ready - 2];
26057 ready[n_ready - 2] = insn;
26059 return issue_rate;
26062 static bool
26063 ix86_class_likely_spilled_p (reg_class_t);
26065 /* Returns true if lhs of insn is HW function argument register and set up
26066 is_spilled to true if it is likely spilled HW register. */
26067 static bool
26068 insn_is_function_arg (rtx insn, bool* is_spilled)
26070 rtx dst;
26072 if (!NONDEBUG_INSN_P (insn))
26073 return false;
26074 /* Call instructions are not movable, ignore it. */
26075 if (CALL_P (insn))
26076 return false;
26077 insn = PATTERN (insn);
26078 if (GET_CODE (insn) == PARALLEL)
26079 insn = XVECEXP (insn, 0, 0);
26080 if (GET_CODE (insn) != SET)
26081 return false;
26082 dst = SET_DEST (insn);
26083 if (REG_P (dst) && HARD_REGISTER_P (dst)
26084 && ix86_function_arg_regno_p (REGNO (dst)))
26086 /* Is it likely spilled HW register? */
26087 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26088 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26089 *is_spilled = true;
26090 return true;
26092 return false;
26095 /* Add output dependencies for chain of function adjacent arguments if only
26096 there is a move to likely spilled HW register. Return first argument
26097 if at least one dependence was added or NULL otherwise. */
26098 static rtx
26099 add_parameter_dependencies (rtx call, rtx head)
26101 rtx insn;
26102 rtx last = call;
26103 rtx first_arg = NULL;
26104 bool is_spilled = false;
26106 head = PREV_INSN (head);
26108 /* Find nearest to call argument passing instruction. */
26109 while (true)
26111 last = PREV_INSN (last);
26112 if (last == head)
26113 return NULL;
26114 if (!NONDEBUG_INSN_P (last))
26115 continue;
26116 if (insn_is_function_arg (last, &is_spilled))
26117 break;
26118 return NULL;
26121 first_arg = last;
26122 while (true)
26124 insn = PREV_INSN (last);
26125 if (!INSN_P (insn))
26126 break;
26127 if (insn == head)
26128 break;
26129 if (!NONDEBUG_INSN_P (insn))
26131 last = insn;
26132 continue;
26134 if (insn_is_function_arg (insn, &is_spilled))
26136 /* Add output depdendence between two function arguments if chain
26137 of output arguments contains likely spilled HW registers. */
26138 if (is_spilled)
26139 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26140 first_arg = last = insn;
26142 else
26143 break;
26145 if (!is_spilled)
26146 return NULL;
26147 return first_arg;
26150 /* Add output or anti dependency from insn to first_arg to restrict its code
26151 motion. */
26152 static void
26153 avoid_func_arg_motion (rtx first_arg, rtx insn)
26155 rtx set;
26156 rtx tmp;
26158 set = single_set (insn);
26159 if (!set)
26160 return;
26161 tmp = SET_DEST (set);
26162 if (REG_P (tmp))
26164 /* Add output dependency to the first function argument. */
26165 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26166 return;
26168 /* Add anti dependency. */
26169 add_dependence (first_arg, insn, REG_DEP_ANTI);
26172 /* Avoid cross block motion of function argument through adding dependency
26173 from the first non-jump instruction in bb. */
26174 static void
26175 add_dependee_for_func_arg (rtx arg, basic_block bb)
26177 rtx insn = BB_END (bb);
26179 while (insn)
26181 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26183 rtx set = single_set (insn);
26184 if (set)
26186 avoid_func_arg_motion (arg, insn);
26187 return;
26190 if (insn == BB_HEAD (bb))
26191 return;
26192 insn = PREV_INSN (insn);
26196 /* Hook for pre-reload schedule - avoid motion of function arguments
26197 passed in likely spilled HW registers. */
26198 static void
26199 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26201 rtx insn;
26202 rtx first_arg = NULL;
26203 if (reload_completed)
26204 return;
26205 while (head != tail && DEBUG_INSN_P (head))
26206 head = NEXT_INSN (head);
26207 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26208 if (INSN_P (insn) && CALL_P (insn))
26210 first_arg = add_parameter_dependencies (insn, head);
26211 if (first_arg)
26213 /* Add dependee for first argument to predecessors if only
26214 region contains more than one block. */
26215 basic_block bb = BLOCK_FOR_INSN (insn);
26216 int rgn = CONTAINING_RGN (bb->index);
26217 int nr_blks = RGN_NR_BLOCKS (rgn);
26218 /* Skip trivial regions and region head blocks that can have
26219 predecessors outside of region. */
26220 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26222 edge e;
26223 edge_iterator ei;
26224 /* Assume that region is SCC, i.e. all immediate predecessors
26225 of non-head block are in the same region. */
26226 FOR_EACH_EDGE (e, ei, bb->preds)
26228 /* Avoid creating of loop-carried dependencies through
26229 using topological odering in region. */
26230 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26231 add_dependee_for_func_arg (first_arg, e->src);
26234 insn = first_arg;
26235 if (insn == head)
26236 break;
26239 else if (first_arg)
26240 avoid_func_arg_motion (first_arg, insn);
26243 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26244 HW registers to maximum, to schedule them at soon as possible. These are
26245 moves from function argument registers at the top of the function entry
26246 and moves from function return value registers after call. */
26247 static int
26248 ix86_adjust_priority (rtx insn, int priority)
26250 rtx set;
26252 if (reload_completed)
26253 return priority;
26255 if (!NONDEBUG_INSN_P (insn))
26256 return priority;
26258 set = single_set (insn);
26259 if (set)
26261 rtx tmp = SET_SRC (set);
26262 if (REG_P (tmp)
26263 && HARD_REGISTER_P (tmp)
26264 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26265 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26266 return current_sched_info->sched_max_insns_priority;
26269 return priority;
26272 /* Model decoder of Core 2/i7.
26273 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26274 track the instruction fetch block boundaries and make sure that long
26275 (9+ bytes) instructions are assigned to D0. */
26277 /* Maximum length of an insn that can be handled by
26278 a secondary decoder unit. '8' for Core 2/i7. */
26279 static int core2i7_secondary_decoder_max_insn_size;
26281 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26282 '16' for Core 2/i7. */
26283 static int core2i7_ifetch_block_size;
26285 /* Maximum number of instructions decoder can handle per cycle.
26286 '6' for Core 2/i7. */
26287 static int core2i7_ifetch_block_max_insns;
26289 typedef struct ix86_first_cycle_multipass_data_ *
26290 ix86_first_cycle_multipass_data_t;
26291 typedef const struct ix86_first_cycle_multipass_data_ *
26292 const_ix86_first_cycle_multipass_data_t;
26294 /* A variable to store target state across calls to max_issue within
26295 one cycle. */
26296 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26297 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26299 /* Initialize DATA. */
26300 static void
26301 core2i7_first_cycle_multipass_init (void *_data)
26303 ix86_first_cycle_multipass_data_t data
26304 = (ix86_first_cycle_multipass_data_t) _data;
26306 data->ifetch_block_len = 0;
26307 data->ifetch_block_n_insns = 0;
26308 data->ready_try_change = NULL;
26309 data->ready_try_change_size = 0;
26312 /* Advancing the cycle; reset ifetch block counts. */
26313 static void
26314 core2i7_dfa_post_advance_cycle (void)
26316 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26318 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26320 data->ifetch_block_len = 0;
26321 data->ifetch_block_n_insns = 0;
26324 static int min_insn_size (rtx);
26326 /* Filter out insns from ready_try that the core will not be able to issue
26327 on current cycle due to decoder. */
26328 static void
26329 core2i7_first_cycle_multipass_filter_ready_try
26330 (const_ix86_first_cycle_multipass_data_t data,
26331 char *ready_try, int n_ready, bool first_cycle_insn_p)
26333 while (n_ready--)
26335 rtx insn;
26336 int insn_size;
26338 if (ready_try[n_ready])
26339 continue;
26341 insn = get_ready_element (n_ready);
26342 insn_size = min_insn_size (insn);
26344 if (/* If this is a too long an insn for a secondary decoder ... */
26345 (!first_cycle_insn_p
26346 && insn_size > core2i7_secondary_decoder_max_insn_size)
26347 /* ... or it would not fit into the ifetch block ... */
26348 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26349 /* ... or the decoder is full already ... */
26350 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26351 /* ... mask the insn out. */
26353 ready_try[n_ready] = 1;
26355 if (data->ready_try_change)
26356 bitmap_set_bit (data->ready_try_change, n_ready);
26361 /* Prepare for a new round of multipass lookahead scheduling. */
26362 static void
26363 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26364 bool first_cycle_insn_p)
26366 ix86_first_cycle_multipass_data_t data
26367 = (ix86_first_cycle_multipass_data_t) _data;
26368 const_ix86_first_cycle_multipass_data_t prev_data
26369 = ix86_first_cycle_multipass_data;
26371 /* Restore the state from the end of the previous round. */
26372 data->ifetch_block_len = prev_data->ifetch_block_len;
26373 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26375 /* Filter instructions that cannot be issued on current cycle due to
26376 decoder restrictions. */
26377 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26378 first_cycle_insn_p);
26381 /* INSN is being issued in current solution. Account for its impact on
26382 the decoder model. */
26383 static void
26384 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26385 rtx insn, const void *_prev_data)
26387 ix86_first_cycle_multipass_data_t data
26388 = (ix86_first_cycle_multipass_data_t) _data;
26389 const_ix86_first_cycle_multipass_data_t prev_data
26390 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26392 int insn_size = min_insn_size (insn);
26394 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26395 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26396 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26397 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26399 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26400 if (!data->ready_try_change)
26402 data->ready_try_change = sbitmap_alloc (n_ready);
26403 data->ready_try_change_size = n_ready;
26405 else if (data->ready_try_change_size < n_ready)
26407 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26408 n_ready, 0);
26409 data->ready_try_change_size = n_ready;
26411 bitmap_clear (data->ready_try_change);
26413 /* Filter out insns from ready_try that the core will not be able to issue
26414 on current cycle due to decoder. */
26415 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26416 false);
26419 /* Revert the effect on ready_try. */
26420 static void
26421 core2i7_first_cycle_multipass_backtrack (const void *_data,
26422 char *ready_try,
26423 int n_ready ATTRIBUTE_UNUSED)
26425 const_ix86_first_cycle_multipass_data_t data
26426 = (const_ix86_first_cycle_multipass_data_t) _data;
26427 unsigned int i = 0;
26428 sbitmap_iterator sbi;
26430 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26431 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26433 ready_try[i] = 0;
26437 /* Save the result of multipass lookahead scheduling for the next round. */
26438 static void
26439 core2i7_first_cycle_multipass_end (const void *_data)
26441 const_ix86_first_cycle_multipass_data_t data
26442 = (const_ix86_first_cycle_multipass_data_t) _data;
26443 ix86_first_cycle_multipass_data_t next_data
26444 = ix86_first_cycle_multipass_data;
26446 if (data != NULL)
26448 next_data->ifetch_block_len = data->ifetch_block_len;
26449 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26453 /* Deallocate target data. */
26454 static void
26455 core2i7_first_cycle_multipass_fini (void *_data)
26457 ix86_first_cycle_multipass_data_t data
26458 = (ix86_first_cycle_multipass_data_t) _data;
26460 if (data->ready_try_change)
26462 sbitmap_free (data->ready_try_change);
26463 data->ready_try_change = NULL;
26464 data->ready_try_change_size = 0;
26468 /* Prepare for scheduling pass. */
26469 static void
26470 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26471 int verbose ATTRIBUTE_UNUSED,
26472 int max_uid ATTRIBUTE_UNUSED)
26474 /* Install scheduling hooks for current CPU. Some of these hooks are used
26475 in time-critical parts of the scheduler, so we only set them up when
26476 they are actually used. */
26477 switch (ix86_tune)
26479 case PROCESSOR_CORE2:
26480 case PROCESSOR_NEHALEM:
26481 case PROCESSOR_SANDYBRIDGE:
26482 case PROCESSOR_HASWELL:
26483 /* Do not perform multipass scheduling for pre-reload schedule
26484 to save compile time. */
26485 if (reload_completed)
26487 targetm.sched.dfa_post_advance_cycle
26488 = core2i7_dfa_post_advance_cycle;
26489 targetm.sched.first_cycle_multipass_init
26490 = core2i7_first_cycle_multipass_init;
26491 targetm.sched.first_cycle_multipass_begin
26492 = core2i7_first_cycle_multipass_begin;
26493 targetm.sched.first_cycle_multipass_issue
26494 = core2i7_first_cycle_multipass_issue;
26495 targetm.sched.first_cycle_multipass_backtrack
26496 = core2i7_first_cycle_multipass_backtrack;
26497 targetm.sched.first_cycle_multipass_end
26498 = core2i7_first_cycle_multipass_end;
26499 targetm.sched.first_cycle_multipass_fini
26500 = core2i7_first_cycle_multipass_fini;
26502 /* Set decoder parameters. */
26503 core2i7_secondary_decoder_max_insn_size = 8;
26504 core2i7_ifetch_block_size = 16;
26505 core2i7_ifetch_block_max_insns = 6;
26506 break;
26508 /* ... Fall through ... */
26509 default:
26510 targetm.sched.dfa_post_advance_cycle = NULL;
26511 targetm.sched.first_cycle_multipass_init = NULL;
26512 targetm.sched.first_cycle_multipass_begin = NULL;
26513 targetm.sched.first_cycle_multipass_issue = NULL;
26514 targetm.sched.first_cycle_multipass_backtrack = NULL;
26515 targetm.sched.first_cycle_multipass_end = NULL;
26516 targetm.sched.first_cycle_multipass_fini = NULL;
26517 break;
26522 /* Compute the alignment given to a constant that is being placed in memory.
26523 EXP is the constant and ALIGN is the alignment that the object would
26524 ordinarily have.
26525 The value of this function is used instead of that alignment to align
26526 the object. */
26529 ix86_constant_alignment (tree exp, int align)
26531 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26532 || TREE_CODE (exp) == INTEGER_CST)
26534 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26535 return 64;
26536 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26537 return 128;
26539 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26540 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26541 return BITS_PER_WORD;
26543 return align;
26546 /* Compute the alignment for a static variable.
26547 TYPE is the data type, and ALIGN is the alignment that
26548 the object would ordinarily have. The value of this function is used
26549 instead of that alignment to align the object. */
26552 ix86_data_alignment (tree type, int align, bool opt)
26554 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26555 for symbols from other compilation units or symbols that don't need
26556 to bind locally. In order to preserve some ABI compatibility with
26557 those compilers, ensure we don't decrease alignment from what we
26558 used to assume. */
26560 int max_align_compat
26561 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26563 /* A data structure, equal or greater than the size of a cache line
26564 (64 bytes in the Pentium 4 and other recent Intel processors, including
26565 processors based on Intel Core microarchitecture) should be aligned
26566 so that its base address is a multiple of a cache line size. */
26568 int max_align
26569 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26571 if (max_align < BITS_PER_WORD)
26572 max_align = BITS_PER_WORD;
26574 if (opt
26575 && AGGREGATE_TYPE_P (type)
26576 && TYPE_SIZE (type)
26577 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26579 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
26580 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26581 && align < max_align_compat)
26582 align = max_align_compat;
26583 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26584 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26585 && align < max_align)
26586 align = max_align;
26589 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26590 to 16byte boundary. */
26591 if (TARGET_64BIT)
26593 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26594 && TYPE_SIZE (type)
26595 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26596 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26597 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26598 return 128;
26601 if (!opt)
26602 return align;
26604 if (TREE_CODE (type) == ARRAY_TYPE)
26606 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26607 return 64;
26608 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26609 return 128;
26611 else if (TREE_CODE (type) == COMPLEX_TYPE)
26614 if (TYPE_MODE (type) == DCmode && align < 64)
26615 return 64;
26616 if ((TYPE_MODE (type) == XCmode
26617 || TYPE_MODE (type) == TCmode) && align < 128)
26618 return 128;
26620 else if ((TREE_CODE (type) == RECORD_TYPE
26621 || TREE_CODE (type) == UNION_TYPE
26622 || TREE_CODE (type) == QUAL_UNION_TYPE)
26623 && TYPE_FIELDS (type))
26625 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26626 return 64;
26627 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26628 return 128;
26630 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26631 || TREE_CODE (type) == INTEGER_TYPE)
26633 if (TYPE_MODE (type) == DFmode && align < 64)
26634 return 64;
26635 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26636 return 128;
26639 return align;
26642 /* Compute the alignment for a local variable or a stack slot. EXP is
26643 the data type or decl itself, MODE is the widest mode available and
26644 ALIGN is the alignment that the object would ordinarily have. The
26645 value of this macro is used instead of that alignment to align the
26646 object. */
26648 unsigned int
26649 ix86_local_alignment (tree exp, enum machine_mode mode,
26650 unsigned int align)
26652 tree type, decl;
26654 if (exp && DECL_P (exp))
26656 type = TREE_TYPE (exp);
26657 decl = exp;
26659 else
26661 type = exp;
26662 decl = NULL;
26665 /* Don't do dynamic stack realignment for long long objects with
26666 -mpreferred-stack-boundary=2. */
26667 if (!TARGET_64BIT
26668 && align == 64
26669 && ix86_preferred_stack_boundary < 64
26670 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26671 && (!type || !TYPE_USER_ALIGN (type))
26672 && (!decl || !DECL_USER_ALIGN (decl)))
26673 align = 32;
26675 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26676 register in MODE. We will return the largest alignment of XF
26677 and DF. */
26678 if (!type)
26680 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26681 align = GET_MODE_ALIGNMENT (DFmode);
26682 return align;
26685 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26686 to 16byte boundary. Exact wording is:
26688 An array uses the same alignment as its elements, except that a local or
26689 global array variable of length at least 16 bytes or
26690 a C99 variable-length array variable always has alignment of at least 16 bytes.
26692 This was added to allow use of aligned SSE instructions at arrays. This
26693 rule is meant for static storage (where compiler can not do the analysis
26694 by itself). We follow it for automatic variables only when convenient.
26695 We fully control everything in the function compiled and functions from
26696 other unit can not rely on the alignment.
26698 Exclude va_list type. It is the common case of local array where
26699 we can not benefit from the alignment.
26701 TODO: Probably one should optimize for size only when var is not escaping. */
26702 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26703 && TARGET_SSE)
26705 if (AGGREGATE_TYPE_P (type)
26706 && (va_list_type_node == NULL_TREE
26707 || (TYPE_MAIN_VARIANT (type)
26708 != TYPE_MAIN_VARIANT (va_list_type_node)))
26709 && TYPE_SIZE (type)
26710 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26711 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26712 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26713 return 128;
26715 if (TREE_CODE (type) == ARRAY_TYPE)
26717 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26718 return 64;
26719 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26720 return 128;
26722 else if (TREE_CODE (type) == COMPLEX_TYPE)
26724 if (TYPE_MODE (type) == DCmode && align < 64)
26725 return 64;
26726 if ((TYPE_MODE (type) == XCmode
26727 || TYPE_MODE (type) == TCmode) && align < 128)
26728 return 128;
26730 else if ((TREE_CODE (type) == RECORD_TYPE
26731 || TREE_CODE (type) == UNION_TYPE
26732 || TREE_CODE (type) == QUAL_UNION_TYPE)
26733 && TYPE_FIELDS (type))
26735 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26736 return 64;
26737 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26738 return 128;
26740 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26741 || TREE_CODE (type) == INTEGER_TYPE)
26744 if (TYPE_MODE (type) == DFmode && align < 64)
26745 return 64;
26746 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26747 return 128;
26749 return align;
26752 /* Compute the minimum required alignment for dynamic stack realignment
26753 purposes for a local variable, parameter or a stack slot. EXP is
26754 the data type or decl itself, MODE is its mode and ALIGN is the
26755 alignment that the object would ordinarily have. */
26757 unsigned int
26758 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26759 unsigned int align)
26761 tree type, decl;
26763 if (exp && DECL_P (exp))
26765 type = TREE_TYPE (exp);
26766 decl = exp;
26768 else
26770 type = exp;
26771 decl = NULL;
26774 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26775 return align;
26777 /* Don't do dynamic stack realignment for long long objects with
26778 -mpreferred-stack-boundary=2. */
26779 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26780 && (!type || !TYPE_USER_ALIGN (type))
26781 && (!decl || !DECL_USER_ALIGN (decl)))
26782 return 32;
26784 return align;
26787 /* Find a location for the static chain incoming to a nested function.
26788 This is a register, unless all free registers are used by arguments. */
26790 static rtx
26791 ix86_static_chain (const_tree fndecl, bool incoming_p)
26793 unsigned regno;
26795 if (!DECL_STATIC_CHAIN (fndecl))
26796 return NULL;
26798 if (TARGET_64BIT)
26800 /* We always use R10 in 64-bit mode. */
26801 regno = R10_REG;
26803 else
26805 tree fntype;
26806 unsigned int ccvt;
26808 /* By default in 32-bit mode we use ECX to pass the static chain. */
26809 regno = CX_REG;
26811 fntype = TREE_TYPE (fndecl);
26812 ccvt = ix86_get_callcvt (fntype);
26813 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26815 /* Fastcall functions use ecx/edx for arguments, which leaves
26816 us with EAX for the static chain.
26817 Thiscall functions use ecx for arguments, which also
26818 leaves us with EAX for the static chain. */
26819 regno = AX_REG;
26821 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26823 /* Thiscall functions use ecx for arguments, which leaves
26824 us with EAX and EDX for the static chain.
26825 We are using for abi-compatibility EAX. */
26826 regno = AX_REG;
26828 else if (ix86_function_regparm (fntype, fndecl) == 3)
26830 /* For regparm 3, we have no free call-clobbered registers in
26831 which to store the static chain. In order to implement this,
26832 we have the trampoline push the static chain to the stack.
26833 However, we can't push a value below the return address when
26834 we call the nested function directly, so we have to use an
26835 alternate entry point. For this we use ESI, and have the
26836 alternate entry point push ESI, so that things appear the
26837 same once we're executing the nested function. */
26838 if (incoming_p)
26840 if (fndecl == current_function_decl)
26841 ix86_static_chain_on_stack = true;
26842 return gen_frame_mem (SImode,
26843 plus_constant (Pmode,
26844 arg_pointer_rtx, -8));
26846 regno = SI_REG;
26850 return gen_rtx_REG (Pmode, regno);
26853 /* Emit RTL insns to initialize the variable parts of a trampoline.
26854 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26855 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26856 to be passed to the target function. */
26858 static void
26859 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26861 rtx mem, fnaddr;
26862 int opcode;
26863 int offset = 0;
26865 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26867 if (TARGET_64BIT)
26869 int size;
26871 /* Load the function address to r11. Try to load address using
26872 the shorter movl instead of movabs. We may want to support
26873 movq for kernel mode, but kernel does not use trampolines at
26874 the moment. FNADDR is a 32bit address and may not be in
26875 DImode when ptr_mode == SImode. Always use movl in this
26876 case. */
26877 if (ptr_mode == SImode
26878 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26880 fnaddr = copy_addr_to_reg (fnaddr);
26882 mem = adjust_address (m_tramp, HImode, offset);
26883 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26885 mem = adjust_address (m_tramp, SImode, offset + 2);
26886 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26887 offset += 6;
26889 else
26891 mem = adjust_address (m_tramp, HImode, offset);
26892 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26894 mem = adjust_address (m_tramp, DImode, offset + 2);
26895 emit_move_insn (mem, fnaddr);
26896 offset += 10;
26899 /* Load static chain using movabs to r10. Use the shorter movl
26900 instead of movabs when ptr_mode == SImode. */
26901 if (ptr_mode == SImode)
26903 opcode = 0xba41;
26904 size = 6;
26906 else
26908 opcode = 0xba49;
26909 size = 10;
26912 mem = adjust_address (m_tramp, HImode, offset);
26913 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26915 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26916 emit_move_insn (mem, chain_value);
26917 offset += size;
26919 /* Jump to r11; the last (unused) byte is a nop, only there to
26920 pad the write out to a single 32-bit store. */
26921 mem = adjust_address (m_tramp, SImode, offset);
26922 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26923 offset += 4;
26925 else
26927 rtx disp, chain;
26929 /* Depending on the static chain location, either load a register
26930 with a constant, or push the constant to the stack. All of the
26931 instructions are the same size. */
26932 chain = ix86_static_chain (fndecl, true);
26933 if (REG_P (chain))
26935 switch (REGNO (chain))
26937 case AX_REG:
26938 opcode = 0xb8; break;
26939 case CX_REG:
26940 opcode = 0xb9; break;
26941 default:
26942 gcc_unreachable ();
26945 else
26946 opcode = 0x68;
26948 mem = adjust_address (m_tramp, QImode, offset);
26949 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26951 mem = adjust_address (m_tramp, SImode, offset + 1);
26952 emit_move_insn (mem, chain_value);
26953 offset += 5;
26955 mem = adjust_address (m_tramp, QImode, offset);
26956 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
26958 mem = adjust_address (m_tramp, SImode, offset + 1);
26960 /* Compute offset from the end of the jmp to the target function.
26961 In the case in which the trampoline stores the static chain on
26962 the stack, we need to skip the first insn which pushes the
26963 (call-saved) register static chain; this push is 1 byte. */
26964 offset += 5;
26965 disp = expand_binop (SImode, sub_optab, fnaddr,
26966 plus_constant (Pmode, XEXP (m_tramp, 0),
26967 offset - (MEM_P (chain) ? 1 : 0)),
26968 NULL_RTX, 1, OPTAB_DIRECT);
26969 emit_move_insn (mem, disp);
26972 gcc_assert (offset <= TRAMPOLINE_SIZE);
26974 #ifdef HAVE_ENABLE_EXECUTE_STACK
26975 #ifdef CHECK_EXECUTE_STACK_ENABLED
26976 if (CHECK_EXECUTE_STACK_ENABLED)
26977 #endif
26978 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
26979 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
26980 #endif
26983 /* The following file contains several enumerations and data structures
26984 built from the definitions in i386-builtin-types.def. */
26986 #include "i386-builtin-types.inc"
26988 /* Table for the ix86 builtin non-function types. */
26989 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
26991 /* Retrieve an element from the above table, building some of
26992 the types lazily. */
26994 static tree
26995 ix86_get_builtin_type (enum ix86_builtin_type tcode)
26997 unsigned int index;
26998 tree type, itype;
27000 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27002 type = ix86_builtin_type_tab[(int) tcode];
27003 if (type != NULL)
27004 return type;
27006 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27007 if (tcode <= IX86_BT_LAST_VECT)
27009 enum machine_mode mode;
27011 index = tcode - IX86_BT_LAST_PRIM - 1;
27012 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27013 mode = ix86_builtin_type_vect_mode[index];
27015 type = build_vector_type_for_mode (itype, mode);
27017 else
27019 int quals;
27021 index = tcode - IX86_BT_LAST_VECT - 1;
27022 if (tcode <= IX86_BT_LAST_PTR)
27023 quals = TYPE_UNQUALIFIED;
27024 else
27025 quals = TYPE_QUAL_CONST;
27027 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27028 if (quals != TYPE_UNQUALIFIED)
27029 itype = build_qualified_type (itype, quals);
27031 type = build_pointer_type (itype);
27034 ix86_builtin_type_tab[(int) tcode] = type;
27035 return type;
27038 /* Table for the ix86 builtin function types. */
27039 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27041 /* Retrieve an element from the above table, building some of
27042 the types lazily. */
27044 static tree
27045 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27047 tree type;
27049 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27051 type = ix86_builtin_func_type_tab[(int) tcode];
27052 if (type != NULL)
27053 return type;
27055 if (tcode <= IX86_BT_LAST_FUNC)
27057 unsigned start = ix86_builtin_func_start[(int) tcode];
27058 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27059 tree rtype, atype, args = void_list_node;
27060 unsigned i;
27062 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27063 for (i = after - 1; i > start; --i)
27065 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27066 args = tree_cons (NULL, atype, args);
27069 type = build_function_type (rtype, args);
27071 else
27073 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27074 enum ix86_builtin_func_type icode;
27076 icode = ix86_builtin_func_alias_base[index];
27077 type = ix86_get_builtin_func_type (icode);
27080 ix86_builtin_func_type_tab[(int) tcode] = type;
27081 return type;
27085 /* Codes for all the SSE/MMX builtins. */
27086 enum ix86_builtins
27088 IX86_BUILTIN_ADDPS,
27089 IX86_BUILTIN_ADDSS,
27090 IX86_BUILTIN_DIVPS,
27091 IX86_BUILTIN_DIVSS,
27092 IX86_BUILTIN_MULPS,
27093 IX86_BUILTIN_MULSS,
27094 IX86_BUILTIN_SUBPS,
27095 IX86_BUILTIN_SUBSS,
27097 IX86_BUILTIN_CMPEQPS,
27098 IX86_BUILTIN_CMPLTPS,
27099 IX86_BUILTIN_CMPLEPS,
27100 IX86_BUILTIN_CMPGTPS,
27101 IX86_BUILTIN_CMPGEPS,
27102 IX86_BUILTIN_CMPNEQPS,
27103 IX86_BUILTIN_CMPNLTPS,
27104 IX86_BUILTIN_CMPNLEPS,
27105 IX86_BUILTIN_CMPNGTPS,
27106 IX86_BUILTIN_CMPNGEPS,
27107 IX86_BUILTIN_CMPORDPS,
27108 IX86_BUILTIN_CMPUNORDPS,
27109 IX86_BUILTIN_CMPEQSS,
27110 IX86_BUILTIN_CMPLTSS,
27111 IX86_BUILTIN_CMPLESS,
27112 IX86_BUILTIN_CMPNEQSS,
27113 IX86_BUILTIN_CMPNLTSS,
27114 IX86_BUILTIN_CMPNLESS,
27115 IX86_BUILTIN_CMPORDSS,
27116 IX86_BUILTIN_CMPUNORDSS,
27118 IX86_BUILTIN_COMIEQSS,
27119 IX86_BUILTIN_COMILTSS,
27120 IX86_BUILTIN_COMILESS,
27121 IX86_BUILTIN_COMIGTSS,
27122 IX86_BUILTIN_COMIGESS,
27123 IX86_BUILTIN_COMINEQSS,
27124 IX86_BUILTIN_UCOMIEQSS,
27125 IX86_BUILTIN_UCOMILTSS,
27126 IX86_BUILTIN_UCOMILESS,
27127 IX86_BUILTIN_UCOMIGTSS,
27128 IX86_BUILTIN_UCOMIGESS,
27129 IX86_BUILTIN_UCOMINEQSS,
27131 IX86_BUILTIN_CVTPI2PS,
27132 IX86_BUILTIN_CVTPS2PI,
27133 IX86_BUILTIN_CVTSI2SS,
27134 IX86_BUILTIN_CVTSI642SS,
27135 IX86_BUILTIN_CVTSS2SI,
27136 IX86_BUILTIN_CVTSS2SI64,
27137 IX86_BUILTIN_CVTTPS2PI,
27138 IX86_BUILTIN_CVTTSS2SI,
27139 IX86_BUILTIN_CVTTSS2SI64,
27141 IX86_BUILTIN_MAXPS,
27142 IX86_BUILTIN_MAXSS,
27143 IX86_BUILTIN_MINPS,
27144 IX86_BUILTIN_MINSS,
27146 IX86_BUILTIN_LOADUPS,
27147 IX86_BUILTIN_STOREUPS,
27148 IX86_BUILTIN_MOVSS,
27150 IX86_BUILTIN_MOVHLPS,
27151 IX86_BUILTIN_MOVLHPS,
27152 IX86_BUILTIN_LOADHPS,
27153 IX86_BUILTIN_LOADLPS,
27154 IX86_BUILTIN_STOREHPS,
27155 IX86_BUILTIN_STORELPS,
27157 IX86_BUILTIN_MASKMOVQ,
27158 IX86_BUILTIN_MOVMSKPS,
27159 IX86_BUILTIN_PMOVMSKB,
27161 IX86_BUILTIN_MOVNTPS,
27162 IX86_BUILTIN_MOVNTQ,
27164 IX86_BUILTIN_LOADDQU,
27165 IX86_BUILTIN_STOREDQU,
27167 IX86_BUILTIN_PACKSSWB,
27168 IX86_BUILTIN_PACKSSDW,
27169 IX86_BUILTIN_PACKUSWB,
27171 IX86_BUILTIN_PADDB,
27172 IX86_BUILTIN_PADDW,
27173 IX86_BUILTIN_PADDD,
27174 IX86_BUILTIN_PADDQ,
27175 IX86_BUILTIN_PADDSB,
27176 IX86_BUILTIN_PADDSW,
27177 IX86_BUILTIN_PADDUSB,
27178 IX86_BUILTIN_PADDUSW,
27179 IX86_BUILTIN_PSUBB,
27180 IX86_BUILTIN_PSUBW,
27181 IX86_BUILTIN_PSUBD,
27182 IX86_BUILTIN_PSUBQ,
27183 IX86_BUILTIN_PSUBSB,
27184 IX86_BUILTIN_PSUBSW,
27185 IX86_BUILTIN_PSUBUSB,
27186 IX86_BUILTIN_PSUBUSW,
27188 IX86_BUILTIN_PAND,
27189 IX86_BUILTIN_PANDN,
27190 IX86_BUILTIN_POR,
27191 IX86_BUILTIN_PXOR,
27193 IX86_BUILTIN_PAVGB,
27194 IX86_BUILTIN_PAVGW,
27196 IX86_BUILTIN_PCMPEQB,
27197 IX86_BUILTIN_PCMPEQW,
27198 IX86_BUILTIN_PCMPEQD,
27199 IX86_BUILTIN_PCMPGTB,
27200 IX86_BUILTIN_PCMPGTW,
27201 IX86_BUILTIN_PCMPGTD,
27203 IX86_BUILTIN_PMADDWD,
27205 IX86_BUILTIN_PMAXSW,
27206 IX86_BUILTIN_PMAXUB,
27207 IX86_BUILTIN_PMINSW,
27208 IX86_BUILTIN_PMINUB,
27210 IX86_BUILTIN_PMULHUW,
27211 IX86_BUILTIN_PMULHW,
27212 IX86_BUILTIN_PMULLW,
27214 IX86_BUILTIN_PSADBW,
27215 IX86_BUILTIN_PSHUFW,
27217 IX86_BUILTIN_PSLLW,
27218 IX86_BUILTIN_PSLLD,
27219 IX86_BUILTIN_PSLLQ,
27220 IX86_BUILTIN_PSRAW,
27221 IX86_BUILTIN_PSRAD,
27222 IX86_BUILTIN_PSRLW,
27223 IX86_BUILTIN_PSRLD,
27224 IX86_BUILTIN_PSRLQ,
27225 IX86_BUILTIN_PSLLWI,
27226 IX86_BUILTIN_PSLLDI,
27227 IX86_BUILTIN_PSLLQI,
27228 IX86_BUILTIN_PSRAWI,
27229 IX86_BUILTIN_PSRADI,
27230 IX86_BUILTIN_PSRLWI,
27231 IX86_BUILTIN_PSRLDI,
27232 IX86_BUILTIN_PSRLQI,
27234 IX86_BUILTIN_PUNPCKHBW,
27235 IX86_BUILTIN_PUNPCKHWD,
27236 IX86_BUILTIN_PUNPCKHDQ,
27237 IX86_BUILTIN_PUNPCKLBW,
27238 IX86_BUILTIN_PUNPCKLWD,
27239 IX86_BUILTIN_PUNPCKLDQ,
27241 IX86_BUILTIN_SHUFPS,
27243 IX86_BUILTIN_RCPPS,
27244 IX86_BUILTIN_RCPSS,
27245 IX86_BUILTIN_RSQRTPS,
27246 IX86_BUILTIN_RSQRTPS_NR,
27247 IX86_BUILTIN_RSQRTSS,
27248 IX86_BUILTIN_RSQRTF,
27249 IX86_BUILTIN_SQRTPS,
27250 IX86_BUILTIN_SQRTPS_NR,
27251 IX86_BUILTIN_SQRTSS,
27253 IX86_BUILTIN_UNPCKHPS,
27254 IX86_BUILTIN_UNPCKLPS,
27256 IX86_BUILTIN_ANDPS,
27257 IX86_BUILTIN_ANDNPS,
27258 IX86_BUILTIN_ORPS,
27259 IX86_BUILTIN_XORPS,
27261 IX86_BUILTIN_EMMS,
27262 IX86_BUILTIN_LDMXCSR,
27263 IX86_BUILTIN_STMXCSR,
27264 IX86_BUILTIN_SFENCE,
27266 IX86_BUILTIN_FXSAVE,
27267 IX86_BUILTIN_FXRSTOR,
27268 IX86_BUILTIN_FXSAVE64,
27269 IX86_BUILTIN_FXRSTOR64,
27271 IX86_BUILTIN_XSAVE,
27272 IX86_BUILTIN_XRSTOR,
27273 IX86_BUILTIN_XSAVE64,
27274 IX86_BUILTIN_XRSTOR64,
27276 IX86_BUILTIN_XSAVEOPT,
27277 IX86_BUILTIN_XSAVEOPT64,
27279 /* 3DNow! Original */
27280 IX86_BUILTIN_FEMMS,
27281 IX86_BUILTIN_PAVGUSB,
27282 IX86_BUILTIN_PF2ID,
27283 IX86_BUILTIN_PFACC,
27284 IX86_BUILTIN_PFADD,
27285 IX86_BUILTIN_PFCMPEQ,
27286 IX86_BUILTIN_PFCMPGE,
27287 IX86_BUILTIN_PFCMPGT,
27288 IX86_BUILTIN_PFMAX,
27289 IX86_BUILTIN_PFMIN,
27290 IX86_BUILTIN_PFMUL,
27291 IX86_BUILTIN_PFRCP,
27292 IX86_BUILTIN_PFRCPIT1,
27293 IX86_BUILTIN_PFRCPIT2,
27294 IX86_BUILTIN_PFRSQIT1,
27295 IX86_BUILTIN_PFRSQRT,
27296 IX86_BUILTIN_PFSUB,
27297 IX86_BUILTIN_PFSUBR,
27298 IX86_BUILTIN_PI2FD,
27299 IX86_BUILTIN_PMULHRW,
27301 /* 3DNow! Athlon Extensions */
27302 IX86_BUILTIN_PF2IW,
27303 IX86_BUILTIN_PFNACC,
27304 IX86_BUILTIN_PFPNACC,
27305 IX86_BUILTIN_PI2FW,
27306 IX86_BUILTIN_PSWAPDSI,
27307 IX86_BUILTIN_PSWAPDSF,
27309 /* SSE2 */
27310 IX86_BUILTIN_ADDPD,
27311 IX86_BUILTIN_ADDSD,
27312 IX86_BUILTIN_DIVPD,
27313 IX86_BUILTIN_DIVSD,
27314 IX86_BUILTIN_MULPD,
27315 IX86_BUILTIN_MULSD,
27316 IX86_BUILTIN_SUBPD,
27317 IX86_BUILTIN_SUBSD,
27319 IX86_BUILTIN_CMPEQPD,
27320 IX86_BUILTIN_CMPLTPD,
27321 IX86_BUILTIN_CMPLEPD,
27322 IX86_BUILTIN_CMPGTPD,
27323 IX86_BUILTIN_CMPGEPD,
27324 IX86_BUILTIN_CMPNEQPD,
27325 IX86_BUILTIN_CMPNLTPD,
27326 IX86_BUILTIN_CMPNLEPD,
27327 IX86_BUILTIN_CMPNGTPD,
27328 IX86_BUILTIN_CMPNGEPD,
27329 IX86_BUILTIN_CMPORDPD,
27330 IX86_BUILTIN_CMPUNORDPD,
27331 IX86_BUILTIN_CMPEQSD,
27332 IX86_BUILTIN_CMPLTSD,
27333 IX86_BUILTIN_CMPLESD,
27334 IX86_BUILTIN_CMPNEQSD,
27335 IX86_BUILTIN_CMPNLTSD,
27336 IX86_BUILTIN_CMPNLESD,
27337 IX86_BUILTIN_CMPORDSD,
27338 IX86_BUILTIN_CMPUNORDSD,
27340 IX86_BUILTIN_COMIEQSD,
27341 IX86_BUILTIN_COMILTSD,
27342 IX86_BUILTIN_COMILESD,
27343 IX86_BUILTIN_COMIGTSD,
27344 IX86_BUILTIN_COMIGESD,
27345 IX86_BUILTIN_COMINEQSD,
27346 IX86_BUILTIN_UCOMIEQSD,
27347 IX86_BUILTIN_UCOMILTSD,
27348 IX86_BUILTIN_UCOMILESD,
27349 IX86_BUILTIN_UCOMIGTSD,
27350 IX86_BUILTIN_UCOMIGESD,
27351 IX86_BUILTIN_UCOMINEQSD,
27353 IX86_BUILTIN_MAXPD,
27354 IX86_BUILTIN_MAXSD,
27355 IX86_BUILTIN_MINPD,
27356 IX86_BUILTIN_MINSD,
27358 IX86_BUILTIN_ANDPD,
27359 IX86_BUILTIN_ANDNPD,
27360 IX86_BUILTIN_ORPD,
27361 IX86_BUILTIN_XORPD,
27363 IX86_BUILTIN_SQRTPD,
27364 IX86_BUILTIN_SQRTSD,
27366 IX86_BUILTIN_UNPCKHPD,
27367 IX86_BUILTIN_UNPCKLPD,
27369 IX86_BUILTIN_SHUFPD,
27371 IX86_BUILTIN_LOADUPD,
27372 IX86_BUILTIN_STOREUPD,
27373 IX86_BUILTIN_MOVSD,
27375 IX86_BUILTIN_LOADHPD,
27376 IX86_BUILTIN_LOADLPD,
27378 IX86_BUILTIN_CVTDQ2PD,
27379 IX86_BUILTIN_CVTDQ2PS,
27381 IX86_BUILTIN_CVTPD2DQ,
27382 IX86_BUILTIN_CVTPD2PI,
27383 IX86_BUILTIN_CVTPD2PS,
27384 IX86_BUILTIN_CVTTPD2DQ,
27385 IX86_BUILTIN_CVTTPD2PI,
27387 IX86_BUILTIN_CVTPI2PD,
27388 IX86_BUILTIN_CVTSI2SD,
27389 IX86_BUILTIN_CVTSI642SD,
27391 IX86_BUILTIN_CVTSD2SI,
27392 IX86_BUILTIN_CVTSD2SI64,
27393 IX86_BUILTIN_CVTSD2SS,
27394 IX86_BUILTIN_CVTSS2SD,
27395 IX86_BUILTIN_CVTTSD2SI,
27396 IX86_BUILTIN_CVTTSD2SI64,
27398 IX86_BUILTIN_CVTPS2DQ,
27399 IX86_BUILTIN_CVTPS2PD,
27400 IX86_BUILTIN_CVTTPS2DQ,
27402 IX86_BUILTIN_MOVNTI,
27403 IX86_BUILTIN_MOVNTI64,
27404 IX86_BUILTIN_MOVNTPD,
27405 IX86_BUILTIN_MOVNTDQ,
27407 IX86_BUILTIN_MOVQ128,
27409 /* SSE2 MMX */
27410 IX86_BUILTIN_MASKMOVDQU,
27411 IX86_BUILTIN_MOVMSKPD,
27412 IX86_BUILTIN_PMOVMSKB128,
27414 IX86_BUILTIN_PACKSSWB128,
27415 IX86_BUILTIN_PACKSSDW128,
27416 IX86_BUILTIN_PACKUSWB128,
27418 IX86_BUILTIN_PADDB128,
27419 IX86_BUILTIN_PADDW128,
27420 IX86_BUILTIN_PADDD128,
27421 IX86_BUILTIN_PADDQ128,
27422 IX86_BUILTIN_PADDSB128,
27423 IX86_BUILTIN_PADDSW128,
27424 IX86_BUILTIN_PADDUSB128,
27425 IX86_BUILTIN_PADDUSW128,
27426 IX86_BUILTIN_PSUBB128,
27427 IX86_BUILTIN_PSUBW128,
27428 IX86_BUILTIN_PSUBD128,
27429 IX86_BUILTIN_PSUBQ128,
27430 IX86_BUILTIN_PSUBSB128,
27431 IX86_BUILTIN_PSUBSW128,
27432 IX86_BUILTIN_PSUBUSB128,
27433 IX86_BUILTIN_PSUBUSW128,
27435 IX86_BUILTIN_PAND128,
27436 IX86_BUILTIN_PANDN128,
27437 IX86_BUILTIN_POR128,
27438 IX86_BUILTIN_PXOR128,
27440 IX86_BUILTIN_PAVGB128,
27441 IX86_BUILTIN_PAVGW128,
27443 IX86_BUILTIN_PCMPEQB128,
27444 IX86_BUILTIN_PCMPEQW128,
27445 IX86_BUILTIN_PCMPEQD128,
27446 IX86_BUILTIN_PCMPGTB128,
27447 IX86_BUILTIN_PCMPGTW128,
27448 IX86_BUILTIN_PCMPGTD128,
27450 IX86_BUILTIN_PMADDWD128,
27452 IX86_BUILTIN_PMAXSW128,
27453 IX86_BUILTIN_PMAXUB128,
27454 IX86_BUILTIN_PMINSW128,
27455 IX86_BUILTIN_PMINUB128,
27457 IX86_BUILTIN_PMULUDQ,
27458 IX86_BUILTIN_PMULUDQ128,
27459 IX86_BUILTIN_PMULHUW128,
27460 IX86_BUILTIN_PMULHW128,
27461 IX86_BUILTIN_PMULLW128,
27463 IX86_BUILTIN_PSADBW128,
27464 IX86_BUILTIN_PSHUFHW,
27465 IX86_BUILTIN_PSHUFLW,
27466 IX86_BUILTIN_PSHUFD,
27468 IX86_BUILTIN_PSLLDQI128,
27469 IX86_BUILTIN_PSLLWI128,
27470 IX86_BUILTIN_PSLLDI128,
27471 IX86_BUILTIN_PSLLQI128,
27472 IX86_BUILTIN_PSRAWI128,
27473 IX86_BUILTIN_PSRADI128,
27474 IX86_BUILTIN_PSRLDQI128,
27475 IX86_BUILTIN_PSRLWI128,
27476 IX86_BUILTIN_PSRLDI128,
27477 IX86_BUILTIN_PSRLQI128,
27479 IX86_BUILTIN_PSLLDQ128,
27480 IX86_BUILTIN_PSLLW128,
27481 IX86_BUILTIN_PSLLD128,
27482 IX86_BUILTIN_PSLLQ128,
27483 IX86_BUILTIN_PSRAW128,
27484 IX86_BUILTIN_PSRAD128,
27485 IX86_BUILTIN_PSRLW128,
27486 IX86_BUILTIN_PSRLD128,
27487 IX86_BUILTIN_PSRLQ128,
27489 IX86_BUILTIN_PUNPCKHBW128,
27490 IX86_BUILTIN_PUNPCKHWD128,
27491 IX86_BUILTIN_PUNPCKHDQ128,
27492 IX86_BUILTIN_PUNPCKHQDQ128,
27493 IX86_BUILTIN_PUNPCKLBW128,
27494 IX86_BUILTIN_PUNPCKLWD128,
27495 IX86_BUILTIN_PUNPCKLDQ128,
27496 IX86_BUILTIN_PUNPCKLQDQ128,
27498 IX86_BUILTIN_CLFLUSH,
27499 IX86_BUILTIN_MFENCE,
27500 IX86_BUILTIN_LFENCE,
27501 IX86_BUILTIN_PAUSE,
27503 IX86_BUILTIN_FNSTENV,
27504 IX86_BUILTIN_FLDENV,
27505 IX86_BUILTIN_FNSTSW,
27506 IX86_BUILTIN_FNCLEX,
27508 IX86_BUILTIN_BSRSI,
27509 IX86_BUILTIN_BSRDI,
27510 IX86_BUILTIN_RDPMC,
27511 IX86_BUILTIN_RDTSC,
27512 IX86_BUILTIN_RDTSCP,
27513 IX86_BUILTIN_ROLQI,
27514 IX86_BUILTIN_ROLHI,
27515 IX86_BUILTIN_RORQI,
27516 IX86_BUILTIN_RORHI,
27518 /* SSE3. */
27519 IX86_BUILTIN_ADDSUBPS,
27520 IX86_BUILTIN_HADDPS,
27521 IX86_BUILTIN_HSUBPS,
27522 IX86_BUILTIN_MOVSHDUP,
27523 IX86_BUILTIN_MOVSLDUP,
27524 IX86_BUILTIN_ADDSUBPD,
27525 IX86_BUILTIN_HADDPD,
27526 IX86_BUILTIN_HSUBPD,
27527 IX86_BUILTIN_LDDQU,
27529 IX86_BUILTIN_MONITOR,
27530 IX86_BUILTIN_MWAIT,
27532 /* SSSE3. */
27533 IX86_BUILTIN_PHADDW,
27534 IX86_BUILTIN_PHADDD,
27535 IX86_BUILTIN_PHADDSW,
27536 IX86_BUILTIN_PHSUBW,
27537 IX86_BUILTIN_PHSUBD,
27538 IX86_BUILTIN_PHSUBSW,
27539 IX86_BUILTIN_PMADDUBSW,
27540 IX86_BUILTIN_PMULHRSW,
27541 IX86_BUILTIN_PSHUFB,
27542 IX86_BUILTIN_PSIGNB,
27543 IX86_BUILTIN_PSIGNW,
27544 IX86_BUILTIN_PSIGND,
27545 IX86_BUILTIN_PALIGNR,
27546 IX86_BUILTIN_PABSB,
27547 IX86_BUILTIN_PABSW,
27548 IX86_BUILTIN_PABSD,
27550 IX86_BUILTIN_PHADDW128,
27551 IX86_BUILTIN_PHADDD128,
27552 IX86_BUILTIN_PHADDSW128,
27553 IX86_BUILTIN_PHSUBW128,
27554 IX86_BUILTIN_PHSUBD128,
27555 IX86_BUILTIN_PHSUBSW128,
27556 IX86_BUILTIN_PMADDUBSW128,
27557 IX86_BUILTIN_PMULHRSW128,
27558 IX86_BUILTIN_PSHUFB128,
27559 IX86_BUILTIN_PSIGNB128,
27560 IX86_BUILTIN_PSIGNW128,
27561 IX86_BUILTIN_PSIGND128,
27562 IX86_BUILTIN_PALIGNR128,
27563 IX86_BUILTIN_PABSB128,
27564 IX86_BUILTIN_PABSW128,
27565 IX86_BUILTIN_PABSD128,
27567 /* AMDFAM10 - SSE4A New Instructions. */
27568 IX86_BUILTIN_MOVNTSD,
27569 IX86_BUILTIN_MOVNTSS,
27570 IX86_BUILTIN_EXTRQI,
27571 IX86_BUILTIN_EXTRQ,
27572 IX86_BUILTIN_INSERTQI,
27573 IX86_BUILTIN_INSERTQ,
27575 /* SSE4.1. */
27576 IX86_BUILTIN_BLENDPD,
27577 IX86_BUILTIN_BLENDPS,
27578 IX86_BUILTIN_BLENDVPD,
27579 IX86_BUILTIN_BLENDVPS,
27580 IX86_BUILTIN_PBLENDVB128,
27581 IX86_BUILTIN_PBLENDW128,
27583 IX86_BUILTIN_DPPD,
27584 IX86_BUILTIN_DPPS,
27586 IX86_BUILTIN_INSERTPS128,
27588 IX86_BUILTIN_MOVNTDQA,
27589 IX86_BUILTIN_MPSADBW128,
27590 IX86_BUILTIN_PACKUSDW128,
27591 IX86_BUILTIN_PCMPEQQ,
27592 IX86_BUILTIN_PHMINPOSUW128,
27594 IX86_BUILTIN_PMAXSB128,
27595 IX86_BUILTIN_PMAXSD128,
27596 IX86_BUILTIN_PMAXUD128,
27597 IX86_BUILTIN_PMAXUW128,
27599 IX86_BUILTIN_PMINSB128,
27600 IX86_BUILTIN_PMINSD128,
27601 IX86_BUILTIN_PMINUD128,
27602 IX86_BUILTIN_PMINUW128,
27604 IX86_BUILTIN_PMOVSXBW128,
27605 IX86_BUILTIN_PMOVSXBD128,
27606 IX86_BUILTIN_PMOVSXBQ128,
27607 IX86_BUILTIN_PMOVSXWD128,
27608 IX86_BUILTIN_PMOVSXWQ128,
27609 IX86_BUILTIN_PMOVSXDQ128,
27611 IX86_BUILTIN_PMOVZXBW128,
27612 IX86_BUILTIN_PMOVZXBD128,
27613 IX86_BUILTIN_PMOVZXBQ128,
27614 IX86_BUILTIN_PMOVZXWD128,
27615 IX86_BUILTIN_PMOVZXWQ128,
27616 IX86_BUILTIN_PMOVZXDQ128,
27618 IX86_BUILTIN_PMULDQ128,
27619 IX86_BUILTIN_PMULLD128,
27621 IX86_BUILTIN_ROUNDSD,
27622 IX86_BUILTIN_ROUNDSS,
27624 IX86_BUILTIN_ROUNDPD,
27625 IX86_BUILTIN_ROUNDPS,
27627 IX86_BUILTIN_FLOORPD,
27628 IX86_BUILTIN_CEILPD,
27629 IX86_BUILTIN_TRUNCPD,
27630 IX86_BUILTIN_RINTPD,
27631 IX86_BUILTIN_ROUNDPD_AZ,
27633 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27634 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27635 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27637 IX86_BUILTIN_FLOORPS,
27638 IX86_BUILTIN_CEILPS,
27639 IX86_BUILTIN_TRUNCPS,
27640 IX86_BUILTIN_RINTPS,
27641 IX86_BUILTIN_ROUNDPS_AZ,
27643 IX86_BUILTIN_FLOORPS_SFIX,
27644 IX86_BUILTIN_CEILPS_SFIX,
27645 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27647 IX86_BUILTIN_PTESTZ,
27648 IX86_BUILTIN_PTESTC,
27649 IX86_BUILTIN_PTESTNZC,
27651 IX86_BUILTIN_VEC_INIT_V2SI,
27652 IX86_BUILTIN_VEC_INIT_V4HI,
27653 IX86_BUILTIN_VEC_INIT_V8QI,
27654 IX86_BUILTIN_VEC_EXT_V2DF,
27655 IX86_BUILTIN_VEC_EXT_V2DI,
27656 IX86_BUILTIN_VEC_EXT_V4SF,
27657 IX86_BUILTIN_VEC_EXT_V4SI,
27658 IX86_BUILTIN_VEC_EXT_V8HI,
27659 IX86_BUILTIN_VEC_EXT_V2SI,
27660 IX86_BUILTIN_VEC_EXT_V4HI,
27661 IX86_BUILTIN_VEC_EXT_V16QI,
27662 IX86_BUILTIN_VEC_SET_V2DI,
27663 IX86_BUILTIN_VEC_SET_V4SF,
27664 IX86_BUILTIN_VEC_SET_V4SI,
27665 IX86_BUILTIN_VEC_SET_V8HI,
27666 IX86_BUILTIN_VEC_SET_V4HI,
27667 IX86_BUILTIN_VEC_SET_V16QI,
27669 IX86_BUILTIN_VEC_PACK_SFIX,
27670 IX86_BUILTIN_VEC_PACK_SFIX256,
27672 /* SSE4.2. */
27673 IX86_BUILTIN_CRC32QI,
27674 IX86_BUILTIN_CRC32HI,
27675 IX86_BUILTIN_CRC32SI,
27676 IX86_BUILTIN_CRC32DI,
27678 IX86_BUILTIN_PCMPESTRI128,
27679 IX86_BUILTIN_PCMPESTRM128,
27680 IX86_BUILTIN_PCMPESTRA128,
27681 IX86_BUILTIN_PCMPESTRC128,
27682 IX86_BUILTIN_PCMPESTRO128,
27683 IX86_BUILTIN_PCMPESTRS128,
27684 IX86_BUILTIN_PCMPESTRZ128,
27685 IX86_BUILTIN_PCMPISTRI128,
27686 IX86_BUILTIN_PCMPISTRM128,
27687 IX86_BUILTIN_PCMPISTRA128,
27688 IX86_BUILTIN_PCMPISTRC128,
27689 IX86_BUILTIN_PCMPISTRO128,
27690 IX86_BUILTIN_PCMPISTRS128,
27691 IX86_BUILTIN_PCMPISTRZ128,
27693 IX86_BUILTIN_PCMPGTQ,
27695 /* AES instructions */
27696 IX86_BUILTIN_AESENC128,
27697 IX86_BUILTIN_AESENCLAST128,
27698 IX86_BUILTIN_AESDEC128,
27699 IX86_BUILTIN_AESDECLAST128,
27700 IX86_BUILTIN_AESIMC128,
27701 IX86_BUILTIN_AESKEYGENASSIST128,
27703 /* PCLMUL instruction */
27704 IX86_BUILTIN_PCLMULQDQ128,
27706 /* AVX */
27707 IX86_BUILTIN_ADDPD256,
27708 IX86_BUILTIN_ADDPS256,
27709 IX86_BUILTIN_ADDSUBPD256,
27710 IX86_BUILTIN_ADDSUBPS256,
27711 IX86_BUILTIN_ANDPD256,
27712 IX86_BUILTIN_ANDPS256,
27713 IX86_BUILTIN_ANDNPD256,
27714 IX86_BUILTIN_ANDNPS256,
27715 IX86_BUILTIN_BLENDPD256,
27716 IX86_BUILTIN_BLENDPS256,
27717 IX86_BUILTIN_BLENDVPD256,
27718 IX86_BUILTIN_BLENDVPS256,
27719 IX86_BUILTIN_DIVPD256,
27720 IX86_BUILTIN_DIVPS256,
27721 IX86_BUILTIN_DPPS256,
27722 IX86_BUILTIN_HADDPD256,
27723 IX86_BUILTIN_HADDPS256,
27724 IX86_BUILTIN_HSUBPD256,
27725 IX86_BUILTIN_HSUBPS256,
27726 IX86_BUILTIN_MAXPD256,
27727 IX86_BUILTIN_MAXPS256,
27728 IX86_BUILTIN_MINPD256,
27729 IX86_BUILTIN_MINPS256,
27730 IX86_BUILTIN_MULPD256,
27731 IX86_BUILTIN_MULPS256,
27732 IX86_BUILTIN_ORPD256,
27733 IX86_BUILTIN_ORPS256,
27734 IX86_BUILTIN_SHUFPD256,
27735 IX86_BUILTIN_SHUFPS256,
27736 IX86_BUILTIN_SUBPD256,
27737 IX86_BUILTIN_SUBPS256,
27738 IX86_BUILTIN_XORPD256,
27739 IX86_BUILTIN_XORPS256,
27740 IX86_BUILTIN_CMPSD,
27741 IX86_BUILTIN_CMPSS,
27742 IX86_BUILTIN_CMPPD,
27743 IX86_BUILTIN_CMPPS,
27744 IX86_BUILTIN_CMPPD256,
27745 IX86_BUILTIN_CMPPS256,
27746 IX86_BUILTIN_CVTDQ2PD256,
27747 IX86_BUILTIN_CVTDQ2PS256,
27748 IX86_BUILTIN_CVTPD2PS256,
27749 IX86_BUILTIN_CVTPS2DQ256,
27750 IX86_BUILTIN_CVTPS2PD256,
27751 IX86_BUILTIN_CVTTPD2DQ256,
27752 IX86_BUILTIN_CVTPD2DQ256,
27753 IX86_BUILTIN_CVTTPS2DQ256,
27754 IX86_BUILTIN_EXTRACTF128PD256,
27755 IX86_BUILTIN_EXTRACTF128PS256,
27756 IX86_BUILTIN_EXTRACTF128SI256,
27757 IX86_BUILTIN_VZEROALL,
27758 IX86_BUILTIN_VZEROUPPER,
27759 IX86_BUILTIN_VPERMILVARPD,
27760 IX86_BUILTIN_VPERMILVARPS,
27761 IX86_BUILTIN_VPERMILVARPD256,
27762 IX86_BUILTIN_VPERMILVARPS256,
27763 IX86_BUILTIN_VPERMILPD,
27764 IX86_BUILTIN_VPERMILPS,
27765 IX86_BUILTIN_VPERMILPD256,
27766 IX86_BUILTIN_VPERMILPS256,
27767 IX86_BUILTIN_VPERMIL2PD,
27768 IX86_BUILTIN_VPERMIL2PS,
27769 IX86_BUILTIN_VPERMIL2PD256,
27770 IX86_BUILTIN_VPERMIL2PS256,
27771 IX86_BUILTIN_VPERM2F128PD256,
27772 IX86_BUILTIN_VPERM2F128PS256,
27773 IX86_BUILTIN_VPERM2F128SI256,
27774 IX86_BUILTIN_VBROADCASTSS,
27775 IX86_BUILTIN_VBROADCASTSD256,
27776 IX86_BUILTIN_VBROADCASTSS256,
27777 IX86_BUILTIN_VBROADCASTPD256,
27778 IX86_BUILTIN_VBROADCASTPS256,
27779 IX86_BUILTIN_VINSERTF128PD256,
27780 IX86_BUILTIN_VINSERTF128PS256,
27781 IX86_BUILTIN_VINSERTF128SI256,
27782 IX86_BUILTIN_LOADUPD256,
27783 IX86_BUILTIN_LOADUPS256,
27784 IX86_BUILTIN_STOREUPD256,
27785 IX86_BUILTIN_STOREUPS256,
27786 IX86_BUILTIN_LDDQU256,
27787 IX86_BUILTIN_MOVNTDQ256,
27788 IX86_BUILTIN_MOVNTPD256,
27789 IX86_BUILTIN_MOVNTPS256,
27790 IX86_BUILTIN_LOADDQU256,
27791 IX86_BUILTIN_STOREDQU256,
27792 IX86_BUILTIN_MASKLOADPD,
27793 IX86_BUILTIN_MASKLOADPS,
27794 IX86_BUILTIN_MASKSTOREPD,
27795 IX86_BUILTIN_MASKSTOREPS,
27796 IX86_BUILTIN_MASKLOADPD256,
27797 IX86_BUILTIN_MASKLOADPS256,
27798 IX86_BUILTIN_MASKSTOREPD256,
27799 IX86_BUILTIN_MASKSTOREPS256,
27800 IX86_BUILTIN_MOVSHDUP256,
27801 IX86_BUILTIN_MOVSLDUP256,
27802 IX86_BUILTIN_MOVDDUP256,
27804 IX86_BUILTIN_SQRTPD256,
27805 IX86_BUILTIN_SQRTPS256,
27806 IX86_BUILTIN_SQRTPS_NR256,
27807 IX86_BUILTIN_RSQRTPS256,
27808 IX86_BUILTIN_RSQRTPS_NR256,
27810 IX86_BUILTIN_RCPPS256,
27812 IX86_BUILTIN_ROUNDPD256,
27813 IX86_BUILTIN_ROUNDPS256,
27815 IX86_BUILTIN_FLOORPD256,
27816 IX86_BUILTIN_CEILPD256,
27817 IX86_BUILTIN_TRUNCPD256,
27818 IX86_BUILTIN_RINTPD256,
27819 IX86_BUILTIN_ROUNDPD_AZ256,
27821 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27822 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27823 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27825 IX86_BUILTIN_FLOORPS256,
27826 IX86_BUILTIN_CEILPS256,
27827 IX86_BUILTIN_TRUNCPS256,
27828 IX86_BUILTIN_RINTPS256,
27829 IX86_BUILTIN_ROUNDPS_AZ256,
27831 IX86_BUILTIN_FLOORPS_SFIX256,
27832 IX86_BUILTIN_CEILPS_SFIX256,
27833 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27835 IX86_BUILTIN_UNPCKHPD256,
27836 IX86_BUILTIN_UNPCKLPD256,
27837 IX86_BUILTIN_UNPCKHPS256,
27838 IX86_BUILTIN_UNPCKLPS256,
27840 IX86_BUILTIN_SI256_SI,
27841 IX86_BUILTIN_PS256_PS,
27842 IX86_BUILTIN_PD256_PD,
27843 IX86_BUILTIN_SI_SI256,
27844 IX86_BUILTIN_PS_PS256,
27845 IX86_BUILTIN_PD_PD256,
27847 IX86_BUILTIN_VTESTZPD,
27848 IX86_BUILTIN_VTESTCPD,
27849 IX86_BUILTIN_VTESTNZCPD,
27850 IX86_BUILTIN_VTESTZPS,
27851 IX86_BUILTIN_VTESTCPS,
27852 IX86_BUILTIN_VTESTNZCPS,
27853 IX86_BUILTIN_VTESTZPD256,
27854 IX86_BUILTIN_VTESTCPD256,
27855 IX86_BUILTIN_VTESTNZCPD256,
27856 IX86_BUILTIN_VTESTZPS256,
27857 IX86_BUILTIN_VTESTCPS256,
27858 IX86_BUILTIN_VTESTNZCPS256,
27859 IX86_BUILTIN_PTESTZ256,
27860 IX86_BUILTIN_PTESTC256,
27861 IX86_BUILTIN_PTESTNZC256,
27863 IX86_BUILTIN_MOVMSKPD256,
27864 IX86_BUILTIN_MOVMSKPS256,
27866 /* AVX2 */
27867 IX86_BUILTIN_MPSADBW256,
27868 IX86_BUILTIN_PABSB256,
27869 IX86_BUILTIN_PABSW256,
27870 IX86_BUILTIN_PABSD256,
27871 IX86_BUILTIN_PACKSSDW256,
27872 IX86_BUILTIN_PACKSSWB256,
27873 IX86_BUILTIN_PACKUSDW256,
27874 IX86_BUILTIN_PACKUSWB256,
27875 IX86_BUILTIN_PADDB256,
27876 IX86_BUILTIN_PADDW256,
27877 IX86_BUILTIN_PADDD256,
27878 IX86_BUILTIN_PADDQ256,
27879 IX86_BUILTIN_PADDSB256,
27880 IX86_BUILTIN_PADDSW256,
27881 IX86_BUILTIN_PADDUSB256,
27882 IX86_BUILTIN_PADDUSW256,
27883 IX86_BUILTIN_PALIGNR256,
27884 IX86_BUILTIN_AND256I,
27885 IX86_BUILTIN_ANDNOT256I,
27886 IX86_BUILTIN_PAVGB256,
27887 IX86_BUILTIN_PAVGW256,
27888 IX86_BUILTIN_PBLENDVB256,
27889 IX86_BUILTIN_PBLENDVW256,
27890 IX86_BUILTIN_PCMPEQB256,
27891 IX86_BUILTIN_PCMPEQW256,
27892 IX86_BUILTIN_PCMPEQD256,
27893 IX86_BUILTIN_PCMPEQQ256,
27894 IX86_BUILTIN_PCMPGTB256,
27895 IX86_BUILTIN_PCMPGTW256,
27896 IX86_BUILTIN_PCMPGTD256,
27897 IX86_BUILTIN_PCMPGTQ256,
27898 IX86_BUILTIN_PHADDW256,
27899 IX86_BUILTIN_PHADDD256,
27900 IX86_BUILTIN_PHADDSW256,
27901 IX86_BUILTIN_PHSUBW256,
27902 IX86_BUILTIN_PHSUBD256,
27903 IX86_BUILTIN_PHSUBSW256,
27904 IX86_BUILTIN_PMADDUBSW256,
27905 IX86_BUILTIN_PMADDWD256,
27906 IX86_BUILTIN_PMAXSB256,
27907 IX86_BUILTIN_PMAXSW256,
27908 IX86_BUILTIN_PMAXSD256,
27909 IX86_BUILTIN_PMAXUB256,
27910 IX86_BUILTIN_PMAXUW256,
27911 IX86_BUILTIN_PMAXUD256,
27912 IX86_BUILTIN_PMINSB256,
27913 IX86_BUILTIN_PMINSW256,
27914 IX86_BUILTIN_PMINSD256,
27915 IX86_BUILTIN_PMINUB256,
27916 IX86_BUILTIN_PMINUW256,
27917 IX86_BUILTIN_PMINUD256,
27918 IX86_BUILTIN_PMOVMSKB256,
27919 IX86_BUILTIN_PMOVSXBW256,
27920 IX86_BUILTIN_PMOVSXBD256,
27921 IX86_BUILTIN_PMOVSXBQ256,
27922 IX86_BUILTIN_PMOVSXWD256,
27923 IX86_BUILTIN_PMOVSXWQ256,
27924 IX86_BUILTIN_PMOVSXDQ256,
27925 IX86_BUILTIN_PMOVZXBW256,
27926 IX86_BUILTIN_PMOVZXBD256,
27927 IX86_BUILTIN_PMOVZXBQ256,
27928 IX86_BUILTIN_PMOVZXWD256,
27929 IX86_BUILTIN_PMOVZXWQ256,
27930 IX86_BUILTIN_PMOVZXDQ256,
27931 IX86_BUILTIN_PMULDQ256,
27932 IX86_BUILTIN_PMULHRSW256,
27933 IX86_BUILTIN_PMULHUW256,
27934 IX86_BUILTIN_PMULHW256,
27935 IX86_BUILTIN_PMULLW256,
27936 IX86_BUILTIN_PMULLD256,
27937 IX86_BUILTIN_PMULUDQ256,
27938 IX86_BUILTIN_POR256,
27939 IX86_BUILTIN_PSADBW256,
27940 IX86_BUILTIN_PSHUFB256,
27941 IX86_BUILTIN_PSHUFD256,
27942 IX86_BUILTIN_PSHUFHW256,
27943 IX86_BUILTIN_PSHUFLW256,
27944 IX86_BUILTIN_PSIGNB256,
27945 IX86_BUILTIN_PSIGNW256,
27946 IX86_BUILTIN_PSIGND256,
27947 IX86_BUILTIN_PSLLDQI256,
27948 IX86_BUILTIN_PSLLWI256,
27949 IX86_BUILTIN_PSLLW256,
27950 IX86_BUILTIN_PSLLDI256,
27951 IX86_BUILTIN_PSLLD256,
27952 IX86_BUILTIN_PSLLQI256,
27953 IX86_BUILTIN_PSLLQ256,
27954 IX86_BUILTIN_PSRAWI256,
27955 IX86_BUILTIN_PSRAW256,
27956 IX86_BUILTIN_PSRADI256,
27957 IX86_BUILTIN_PSRAD256,
27958 IX86_BUILTIN_PSRLDQI256,
27959 IX86_BUILTIN_PSRLWI256,
27960 IX86_BUILTIN_PSRLW256,
27961 IX86_BUILTIN_PSRLDI256,
27962 IX86_BUILTIN_PSRLD256,
27963 IX86_BUILTIN_PSRLQI256,
27964 IX86_BUILTIN_PSRLQ256,
27965 IX86_BUILTIN_PSUBB256,
27966 IX86_BUILTIN_PSUBW256,
27967 IX86_BUILTIN_PSUBD256,
27968 IX86_BUILTIN_PSUBQ256,
27969 IX86_BUILTIN_PSUBSB256,
27970 IX86_BUILTIN_PSUBSW256,
27971 IX86_BUILTIN_PSUBUSB256,
27972 IX86_BUILTIN_PSUBUSW256,
27973 IX86_BUILTIN_PUNPCKHBW256,
27974 IX86_BUILTIN_PUNPCKHWD256,
27975 IX86_BUILTIN_PUNPCKHDQ256,
27976 IX86_BUILTIN_PUNPCKHQDQ256,
27977 IX86_BUILTIN_PUNPCKLBW256,
27978 IX86_BUILTIN_PUNPCKLWD256,
27979 IX86_BUILTIN_PUNPCKLDQ256,
27980 IX86_BUILTIN_PUNPCKLQDQ256,
27981 IX86_BUILTIN_PXOR256,
27982 IX86_BUILTIN_MOVNTDQA256,
27983 IX86_BUILTIN_VBROADCASTSS_PS,
27984 IX86_BUILTIN_VBROADCASTSS_PS256,
27985 IX86_BUILTIN_VBROADCASTSD_PD256,
27986 IX86_BUILTIN_VBROADCASTSI256,
27987 IX86_BUILTIN_PBLENDD256,
27988 IX86_BUILTIN_PBLENDD128,
27989 IX86_BUILTIN_PBROADCASTB256,
27990 IX86_BUILTIN_PBROADCASTW256,
27991 IX86_BUILTIN_PBROADCASTD256,
27992 IX86_BUILTIN_PBROADCASTQ256,
27993 IX86_BUILTIN_PBROADCASTB128,
27994 IX86_BUILTIN_PBROADCASTW128,
27995 IX86_BUILTIN_PBROADCASTD128,
27996 IX86_BUILTIN_PBROADCASTQ128,
27997 IX86_BUILTIN_VPERMVARSI256,
27998 IX86_BUILTIN_VPERMDF256,
27999 IX86_BUILTIN_VPERMVARSF256,
28000 IX86_BUILTIN_VPERMDI256,
28001 IX86_BUILTIN_VPERMTI256,
28002 IX86_BUILTIN_VEXTRACT128I256,
28003 IX86_BUILTIN_VINSERT128I256,
28004 IX86_BUILTIN_MASKLOADD,
28005 IX86_BUILTIN_MASKLOADQ,
28006 IX86_BUILTIN_MASKLOADD256,
28007 IX86_BUILTIN_MASKLOADQ256,
28008 IX86_BUILTIN_MASKSTORED,
28009 IX86_BUILTIN_MASKSTOREQ,
28010 IX86_BUILTIN_MASKSTORED256,
28011 IX86_BUILTIN_MASKSTOREQ256,
28012 IX86_BUILTIN_PSLLVV4DI,
28013 IX86_BUILTIN_PSLLVV2DI,
28014 IX86_BUILTIN_PSLLVV8SI,
28015 IX86_BUILTIN_PSLLVV4SI,
28016 IX86_BUILTIN_PSRAVV8SI,
28017 IX86_BUILTIN_PSRAVV4SI,
28018 IX86_BUILTIN_PSRLVV4DI,
28019 IX86_BUILTIN_PSRLVV2DI,
28020 IX86_BUILTIN_PSRLVV8SI,
28021 IX86_BUILTIN_PSRLVV4SI,
28023 IX86_BUILTIN_GATHERSIV2DF,
28024 IX86_BUILTIN_GATHERSIV4DF,
28025 IX86_BUILTIN_GATHERDIV2DF,
28026 IX86_BUILTIN_GATHERDIV4DF,
28027 IX86_BUILTIN_GATHERSIV4SF,
28028 IX86_BUILTIN_GATHERSIV8SF,
28029 IX86_BUILTIN_GATHERDIV4SF,
28030 IX86_BUILTIN_GATHERDIV8SF,
28031 IX86_BUILTIN_GATHERSIV2DI,
28032 IX86_BUILTIN_GATHERSIV4DI,
28033 IX86_BUILTIN_GATHERDIV2DI,
28034 IX86_BUILTIN_GATHERDIV4DI,
28035 IX86_BUILTIN_GATHERSIV4SI,
28036 IX86_BUILTIN_GATHERSIV8SI,
28037 IX86_BUILTIN_GATHERDIV4SI,
28038 IX86_BUILTIN_GATHERDIV8SI,
28040 /* AVX512F */
28041 IX86_BUILTIN_ADDPD512,
28042 IX86_BUILTIN_ADDPS512,
28043 IX86_BUILTIN_ADDSD_ROUND,
28044 IX86_BUILTIN_ADDSS_ROUND,
28045 IX86_BUILTIN_ALIGND512,
28046 IX86_BUILTIN_ALIGNQ512,
28047 IX86_BUILTIN_BLENDMD512,
28048 IX86_BUILTIN_BLENDMPD512,
28049 IX86_BUILTIN_BLENDMPS512,
28050 IX86_BUILTIN_BLENDMQ512,
28051 IX86_BUILTIN_BROADCASTF32X4_512,
28052 IX86_BUILTIN_BROADCASTF64X4_512,
28053 IX86_BUILTIN_BROADCASTI32X4_512,
28054 IX86_BUILTIN_BROADCASTI64X4_512,
28055 IX86_BUILTIN_BROADCASTSD512,
28056 IX86_BUILTIN_BROADCASTSS512,
28057 IX86_BUILTIN_CMPD512,
28058 IX86_BUILTIN_CMPPD512,
28059 IX86_BUILTIN_CMPPS512,
28060 IX86_BUILTIN_CMPQ512,
28061 IX86_BUILTIN_CMPSD_MASK,
28062 IX86_BUILTIN_CMPSS_MASK,
28063 IX86_BUILTIN_COMIDF,
28064 IX86_BUILTIN_COMISF,
28065 IX86_BUILTIN_COMPRESSPD512,
28066 IX86_BUILTIN_COMPRESSPDSTORE512,
28067 IX86_BUILTIN_COMPRESSPS512,
28068 IX86_BUILTIN_COMPRESSPSSTORE512,
28069 IX86_BUILTIN_CVTDQ2PD512,
28070 IX86_BUILTIN_CVTDQ2PS512,
28071 IX86_BUILTIN_CVTPD2DQ512,
28072 IX86_BUILTIN_CVTPD2PS512,
28073 IX86_BUILTIN_CVTPD2UDQ512,
28074 IX86_BUILTIN_CVTPH2PS512,
28075 IX86_BUILTIN_CVTPS2DQ512,
28076 IX86_BUILTIN_CVTPS2PD512,
28077 IX86_BUILTIN_CVTPS2PH512,
28078 IX86_BUILTIN_CVTPS2UDQ512,
28079 IX86_BUILTIN_CVTSD2SS_ROUND,
28080 IX86_BUILTIN_CVTSI2SD64,
28081 IX86_BUILTIN_CVTSI2SS32,
28082 IX86_BUILTIN_CVTSI2SS64,
28083 IX86_BUILTIN_CVTSS2SD_ROUND,
28084 IX86_BUILTIN_CVTTPD2DQ512,
28085 IX86_BUILTIN_CVTTPD2UDQ512,
28086 IX86_BUILTIN_CVTTPS2DQ512,
28087 IX86_BUILTIN_CVTTPS2UDQ512,
28088 IX86_BUILTIN_CVTUDQ2PD512,
28089 IX86_BUILTIN_CVTUDQ2PS512,
28090 IX86_BUILTIN_CVTUSI2SD32,
28091 IX86_BUILTIN_CVTUSI2SD64,
28092 IX86_BUILTIN_CVTUSI2SS32,
28093 IX86_BUILTIN_CVTUSI2SS64,
28094 IX86_BUILTIN_DIVPD512,
28095 IX86_BUILTIN_DIVPS512,
28096 IX86_BUILTIN_DIVSD_ROUND,
28097 IX86_BUILTIN_DIVSS_ROUND,
28098 IX86_BUILTIN_EXPANDPD512,
28099 IX86_BUILTIN_EXPANDPD512Z,
28100 IX86_BUILTIN_EXPANDPDLOAD512,
28101 IX86_BUILTIN_EXPANDPDLOAD512Z,
28102 IX86_BUILTIN_EXPANDPS512,
28103 IX86_BUILTIN_EXPANDPS512Z,
28104 IX86_BUILTIN_EXPANDPSLOAD512,
28105 IX86_BUILTIN_EXPANDPSLOAD512Z,
28106 IX86_BUILTIN_EXTRACTF32X4,
28107 IX86_BUILTIN_EXTRACTF64X4,
28108 IX86_BUILTIN_EXTRACTI32X4,
28109 IX86_BUILTIN_EXTRACTI64X4,
28110 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28111 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28112 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28113 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28114 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28115 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28116 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28117 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28118 IX86_BUILTIN_GETEXPPD512,
28119 IX86_BUILTIN_GETEXPPS512,
28120 IX86_BUILTIN_GETEXPSD128,
28121 IX86_BUILTIN_GETEXPSS128,
28122 IX86_BUILTIN_GETMANTPD512,
28123 IX86_BUILTIN_GETMANTPS512,
28124 IX86_BUILTIN_GETMANTSD128,
28125 IX86_BUILTIN_GETMANTSS128,
28126 IX86_BUILTIN_INSERTF32X4,
28127 IX86_BUILTIN_INSERTF64X4,
28128 IX86_BUILTIN_INSERTI32X4,
28129 IX86_BUILTIN_INSERTI64X4,
28130 IX86_BUILTIN_LOADAPD512,
28131 IX86_BUILTIN_LOADAPS512,
28132 IX86_BUILTIN_LOADDQUDI512,
28133 IX86_BUILTIN_LOADDQUSI512,
28134 IX86_BUILTIN_LOADUPD512,
28135 IX86_BUILTIN_LOADUPS512,
28136 IX86_BUILTIN_MAXPD512,
28137 IX86_BUILTIN_MAXPS512,
28138 IX86_BUILTIN_MAXSD_ROUND,
28139 IX86_BUILTIN_MAXSS_ROUND,
28140 IX86_BUILTIN_MINPD512,
28141 IX86_BUILTIN_MINPS512,
28142 IX86_BUILTIN_MINSD_ROUND,
28143 IX86_BUILTIN_MINSS_ROUND,
28144 IX86_BUILTIN_MOVAPD512,
28145 IX86_BUILTIN_MOVAPS512,
28146 IX86_BUILTIN_MOVDDUP512,
28147 IX86_BUILTIN_MOVDQA32LOAD512,
28148 IX86_BUILTIN_MOVDQA32STORE512,
28149 IX86_BUILTIN_MOVDQA32_512,
28150 IX86_BUILTIN_MOVDQA64LOAD512,
28151 IX86_BUILTIN_MOVDQA64STORE512,
28152 IX86_BUILTIN_MOVDQA64_512,
28153 IX86_BUILTIN_MOVNTDQ512,
28154 IX86_BUILTIN_MOVNTDQA512,
28155 IX86_BUILTIN_MOVNTPD512,
28156 IX86_BUILTIN_MOVNTPS512,
28157 IX86_BUILTIN_MOVSHDUP512,
28158 IX86_BUILTIN_MOVSLDUP512,
28159 IX86_BUILTIN_MULPD512,
28160 IX86_BUILTIN_MULPS512,
28161 IX86_BUILTIN_MULSD_ROUND,
28162 IX86_BUILTIN_MULSS_ROUND,
28163 IX86_BUILTIN_PABSD512,
28164 IX86_BUILTIN_PABSQ512,
28165 IX86_BUILTIN_PADDD512,
28166 IX86_BUILTIN_PADDQ512,
28167 IX86_BUILTIN_PANDD512,
28168 IX86_BUILTIN_PANDND512,
28169 IX86_BUILTIN_PANDNQ512,
28170 IX86_BUILTIN_PANDQ512,
28171 IX86_BUILTIN_PBROADCASTD512,
28172 IX86_BUILTIN_PBROADCASTD512_GPR,
28173 IX86_BUILTIN_PBROADCASTMB512,
28174 IX86_BUILTIN_PBROADCASTMW512,
28175 IX86_BUILTIN_PBROADCASTQ512,
28176 IX86_BUILTIN_PBROADCASTQ512_GPR,
28177 IX86_BUILTIN_PBROADCASTQ512_MEM,
28178 IX86_BUILTIN_PCMPEQD512_MASK,
28179 IX86_BUILTIN_PCMPEQQ512_MASK,
28180 IX86_BUILTIN_PCMPGTD512_MASK,
28181 IX86_BUILTIN_PCMPGTQ512_MASK,
28182 IX86_BUILTIN_PCOMPRESSD512,
28183 IX86_BUILTIN_PCOMPRESSDSTORE512,
28184 IX86_BUILTIN_PCOMPRESSQ512,
28185 IX86_BUILTIN_PCOMPRESSQSTORE512,
28186 IX86_BUILTIN_PEXPANDD512,
28187 IX86_BUILTIN_PEXPANDD512Z,
28188 IX86_BUILTIN_PEXPANDDLOAD512,
28189 IX86_BUILTIN_PEXPANDDLOAD512Z,
28190 IX86_BUILTIN_PEXPANDQ512,
28191 IX86_BUILTIN_PEXPANDQ512Z,
28192 IX86_BUILTIN_PEXPANDQLOAD512,
28193 IX86_BUILTIN_PEXPANDQLOAD512Z,
28194 IX86_BUILTIN_PMAXSD512,
28195 IX86_BUILTIN_PMAXSQ512,
28196 IX86_BUILTIN_PMAXUD512,
28197 IX86_BUILTIN_PMAXUQ512,
28198 IX86_BUILTIN_PMINSD512,
28199 IX86_BUILTIN_PMINSQ512,
28200 IX86_BUILTIN_PMINUD512,
28201 IX86_BUILTIN_PMINUQ512,
28202 IX86_BUILTIN_PMOVDB512,
28203 IX86_BUILTIN_PMOVDB512_MEM,
28204 IX86_BUILTIN_PMOVDW512,
28205 IX86_BUILTIN_PMOVDW512_MEM,
28206 IX86_BUILTIN_PMOVQB512,
28207 IX86_BUILTIN_PMOVQB512_MEM,
28208 IX86_BUILTIN_PMOVQD512,
28209 IX86_BUILTIN_PMOVQD512_MEM,
28210 IX86_BUILTIN_PMOVQW512,
28211 IX86_BUILTIN_PMOVQW512_MEM,
28212 IX86_BUILTIN_PMOVSDB512,
28213 IX86_BUILTIN_PMOVSDB512_MEM,
28214 IX86_BUILTIN_PMOVSDW512,
28215 IX86_BUILTIN_PMOVSDW512_MEM,
28216 IX86_BUILTIN_PMOVSQB512,
28217 IX86_BUILTIN_PMOVSQB512_MEM,
28218 IX86_BUILTIN_PMOVSQD512,
28219 IX86_BUILTIN_PMOVSQD512_MEM,
28220 IX86_BUILTIN_PMOVSQW512,
28221 IX86_BUILTIN_PMOVSQW512_MEM,
28222 IX86_BUILTIN_PMOVSXBD512,
28223 IX86_BUILTIN_PMOVSXBQ512,
28224 IX86_BUILTIN_PMOVSXDQ512,
28225 IX86_BUILTIN_PMOVSXWD512,
28226 IX86_BUILTIN_PMOVSXWQ512,
28227 IX86_BUILTIN_PMOVUSDB512,
28228 IX86_BUILTIN_PMOVUSDB512_MEM,
28229 IX86_BUILTIN_PMOVUSDW512,
28230 IX86_BUILTIN_PMOVUSDW512_MEM,
28231 IX86_BUILTIN_PMOVUSQB512,
28232 IX86_BUILTIN_PMOVUSQB512_MEM,
28233 IX86_BUILTIN_PMOVUSQD512,
28234 IX86_BUILTIN_PMOVUSQD512_MEM,
28235 IX86_BUILTIN_PMOVUSQW512,
28236 IX86_BUILTIN_PMOVUSQW512_MEM,
28237 IX86_BUILTIN_PMOVZXBD512,
28238 IX86_BUILTIN_PMOVZXBQ512,
28239 IX86_BUILTIN_PMOVZXDQ512,
28240 IX86_BUILTIN_PMOVZXWD512,
28241 IX86_BUILTIN_PMOVZXWQ512,
28242 IX86_BUILTIN_PMULDQ512,
28243 IX86_BUILTIN_PMULLD512,
28244 IX86_BUILTIN_PMULUDQ512,
28245 IX86_BUILTIN_PORD512,
28246 IX86_BUILTIN_PORQ512,
28247 IX86_BUILTIN_PROLD512,
28248 IX86_BUILTIN_PROLQ512,
28249 IX86_BUILTIN_PROLVD512,
28250 IX86_BUILTIN_PROLVQ512,
28251 IX86_BUILTIN_PRORD512,
28252 IX86_BUILTIN_PRORQ512,
28253 IX86_BUILTIN_PRORVD512,
28254 IX86_BUILTIN_PRORVQ512,
28255 IX86_BUILTIN_PSHUFD512,
28256 IX86_BUILTIN_PSLLD512,
28257 IX86_BUILTIN_PSLLDI512,
28258 IX86_BUILTIN_PSLLQ512,
28259 IX86_BUILTIN_PSLLQI512,
28260 IX86_BUILTIN_PSLLVV16SI,
28261 IX86_BUILTIN_PSLLVV8DI,
28262 IX86_BUILTIN_PSRAD512,
28263 IX86_BUILTIN_PSRADI512,
28264 IX86_BUILTIN_PSRAQ512,
28265 IX86_BUILTIN_PSRAQI512,
28266 IX86_BUILTIN_PSRAVV16SI,
28267 IX86_BUILTIN_PSRAVV8DI,
28268 IX86_BUILTIN_PSRLD512,
28269 IX86_BUILTIN_PSRLDI512,
28270 IX86_BUILTIN_PSRLQ512,
28271 IX86_BUILTIN_PSRLQI512,
28272 IX86_BUILTIN_PSRLVV16SI,
28273 IX86_BUILTIN_PSRLVV8DI,
28274 IX86_BUILTIN_PSUBD512,
28275 IX86_BUILTIN_PSUBQ512,
28276 IX86_BUILTIN_PTESTMD512,
28277 IX86_BUILTIN_PTESTMQ512,
28278 IX86_BUILTIN_PTESTNMD512,
28279 IX86_BUILTIN_PTESTNMQ512,
28280 IX86_BUILTIN_PUNPCKHDQ512,
28281 IX86_BUILTIN_PUNPCKHQDQ512,
28282 IX86_BUILTIN_PUNPCKLDQ512,
28283 IX86_BUILTIN_PUNPCKLQDQ512,
28284 IX86_BUILTIN_PXORD512,
28285 IX86_BUILTIN_PXORQ512,
28286 IX86_BUILTIN_RCP14PD512,
28287 IX86_BUILTIN_RCP14PS512,
28288 IX86_BUILTIN_RCP14SD,
28289 IX86_BUILTIN_RCP14SS,
28290 IX86_BUILTIN_RNDSCALEPD,
28291 IX86_BUILTIN_RNDSCALEPS,
28292 IX86_BUILTIN_RNDSCALESD,
28293 IX86_BUILTIN_RNDSCALESS,
28294 IX86_BUILTIN_RSQRT14PD512,
28295 IX86_BUILTIN_RSQRT14PS512,
28296 IX86_BUILTIN_RSQRT14SD,
28297 IX86_BUILTIN_RSQRT14SS,
28298 IX86_BUILTIN_SCALEFPD512,
28299 IX86_BUILTIN_SCALEFPS512,
28300 IX86_BUILTIN_SCALEFSD,
28301 IX86_BUILTIN_SCALEFSS,
28302 IX86_BUILTIN_SHUFPD512,
28303 IX86_BUILTIN_SHUFPS512,
28304 IX86_BUILTIN_SHUF_F32x4,
28305 IX86_BUILTIN_SHUF_F64x2,
28306 IX86_BUILTIN_SHUF_I32x4,
28307 IX86_BUILTIN_SHUF_I64x2,
28308 IX86_BUILTIN_SQRTPD512,
28309 IX86_BUILTIN_SQRTPD512_MASK,
28310 IX86_BUILTIN_SQRTPS512_MASK,
28311 IX86_BUILTIN_SQRTPS_NR512,
28312 IX86_BUILTIN_SQRTSD_ROUND,
28313 IX86_BUILTIN_SQRTSS_ROUND,
28314 IX86_BUILTIN_STOREAPD512,
28315 IX86_BUILTIN_STOREAPS512,
28316 IX86_BUILTIN_STOREDQUDI512,
28317 IX86_BUILTIN_STOREDQUSI512,
28318 IX86_BUILTIN_STOREUPD512,
28319 IX86_BUILTIN_STOREUPS512,
28320 IX86_BUILTIN_SUBPD512,
28321 IX86_BUILTIN_SUBPS512,
28322 IX86_BUILTIN_SUBSD_ROUND,
28323 IX86_BUILTIN_SUBSS_ROUND,
28324 IX86_BUILTIN_UCMPD512,
28325 IX86_BUILTIN_UCMPQ512,
28326 IX86_BUILTIN_UNPCKHPD512,
28327 IX86_BUILTIN_UNPCKHPS512,
28328 IX86_BUILTIN_UNPCKLPD512,
28329 IX86_BUILTIN_UNPCKLPS512,
28330 IX86_BUILTIN_VCVTSD2SI32,
28331 IX86_BUILTIN_VCVTSD2SI64,
28332 IX86_BUILTIN_VCVTSD2USI32,
28333 IX86_BUILTIN_VCVTSD2USI64,
28334 IX86_BUILTIN_VCVTSS2SI32,
28335 IX86_BUILTIN_VCVTSS2SI64,
28336 IX86_BUILTIN_VCVTSS2USI32,
28337 IX86_BUILTIN_VCVTSS2USI64,
28338 IX86_BUILTIN_VCVTTSD2SI32,
28339 IX86_BUILTIN_VCVTTSD2SI64,
28340 IX86_BUILTIN_VCVTTSD2USI32,
28341 IX86_BUILTIN_VCVTTSD2USI64,
28342 IX86_BUILTIN_VCVTTSS2SI32,
28343 IX86_BUILTIN_VCVTTSS2SI64,
28344 IX86_BUILTIN_VCVTTSS2USI32,
28345 IX86_BUILTIN_VCVTTSS2USI64,
28346 IX86_BUILTIN_VFMADDPD512_MASK,
28347 IX86_BUILTIN_VFMADDPD512_MASK3,
28348 IX86_BUILTIN_VFMADDPD512_MASKZ,
28349 IX86_BUILTIN_VFMADDPS512_MASK,
28350 IX86_BUILTIN_VFMADDPS512_MASK3,
28351 IX86_BUILTIN_VFMADDPS512_MASKZ,
28352 IX86_BUILTIN_VFMADDSD3_ROUND,
28353 IX86_BUILTIN_VFMADDSS3_ROUND,
28354 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28355 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28356 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28357 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28358 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28359 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28360 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28361 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28362 IX86_BUILTIN_VFMSUBPD512_MASK3,
28363 IX86_BUILTIN_VFMSUBPS512_MASK3,
28364 IX86_BUILTIN_VFMSUBSD3_MASK3,
28365 IX86_BUILTIN_VFMSUBSS3_MASK3,
28366 IX86_BUILTIN_VFNMADDPD512_MASK,
28367 IX86_BUILTIN_VFNMADDPS512_MASK,
28368 IX86_BUILTIN_VFNMSUBPD512_MASK,
28369 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28370 IX86_BUILTIN_VFNMSUBPS512_MASK,
28371 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28372 IX86_BUILTIN_VPCLZCNTD512,
28373 IX86_BUILTIN_VPCLZCNTQ512,
28374 IX86_BUILTIN_VPCONFLICTD512,
28375 IX86_BUILTIN_VPCONFLICTQ512,
28376 IX86_BUILTIN_VPERMDF512,
28377 IX86_BUILTIN_VPERMDI512,
28378 IX86_BUILTIN_VPERMI2VARD512,
28379 IX86_BUILTIN_VPERMI2VARPD512,
28380 IX86_BUILTIN_VPERMI2VARPS512,
28381 IX86_BUILTIN_VPERMI2VARQ512,
28382 IX86_BUILTIN_VPERMILPD512,
28383 IX86_BUILTIN_VPERMILPS512,
28384 IX86_BUILTIN_VPERMILVARPD512,
28385 IX86_BUILTIN_VPERMILVARPS512,
28386 IX86_BUILTIN_VPERMT2VARD512,
28387 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28388 IX86_BUILTIN_VPERMT2VARPD512,
28389 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28390 IX86_BUILTIN_VPERMT2VARPS512,
28391 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28392 IX86_BUILTIN_VPERMT2VARQ512,
28393 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28394 IX86_BUILTIN_VPERMVARDF512,
28395 IX86_BUILTIN_VPERMVARDI512,
28396 IX86_BUILTIN_VPERMVARSF512,
28397 IX86_BUILTIN_VPERMVARSI512,
28398 IX86_BUILTIN_VTERNLOGD512_MASK,
28399 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28400 IX86_BUILTIN_VTERNLOGQ512_MASK,
28401 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28403 /* Mask arithmetic operations */
28404 IX86_BUILTIN_KAND16,
28405 IX86_BUILTIN_KANDN16,
28406 IX86_BUILTIN_KNOT16,
28407 IX86_BUILTIN_KOR16,
28408 IX86_BUILTIN_KORTESTC16,
28409 IX86_BUILTIN_KORTESTZ16,
28410 IX86_BUILTIN_KUNPCKBW,
28411 IX86_BUILTIN_KXNOR16,
28412 IX86_BUILTIN_KXOR16,
28413 IX86_BUILTIN_KMOV16,
28415 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28416 where all operands are 32-byte or 64-byte wide respectively. */
28417 IX86_BUILTIN_GATHERALTSIV4DF,
28418 IX86_BUILTIN_GATHERALTDIV8SF,
28419 IX86_BUILTIN_GATHERALTSIV4DI,
28420 IX86_BUILTIN_GATHERALTDIV8SI,
28421 IX86_BUILTIN_GATHER3ALTDIV16SF,
28422 IX86_BUILTIN_GATHER3ALTDIV16SI,
28423 IX86_BUILTIN_GATHER3ALTSIV8DF,
28424 IX86_BUILTIN_GATHER3ALTSIV8DI,
28425 IX86_BUILTIN_GATHER3DIV16SF,
28426 IX86_BUILTIN_GATHER3DIV16SI,
28427 IX86_BUILTIN_GATHER3DIV8DF,
28428 IX86_BUILTIN_GATHER3DIV8DI,
28429 IX86_BUILTIN_GATHER3SIV16SF,
28430 IX86_BUILTIN_GATHER3SIV16SI,
28431 IX86_BUILTIN_GATHER3SIV8DF,
28432 IX86_BUILTIN_GATHER3SIV8DI,
28433 IX86_BUILTIN_SCATTERDIV16SF,
28434 IX86_BUILTIN_SCATTERDIV16SI,
28435 IX86_BUILTIN_SCATTERDIV8DF,
28436 IX86_BUILTIN_SCATTERDIV8DI,
28437 IX86_BUILTIN_SCATTERSIV16SF,
28438 IX86_BUILTIN_SCATTERSIV16SI,
28439 IX86_BUILTIN_SCATTERSIV8DF,
28440 IX86_BUILTIN_SCATTERSIV8DI,
28442 /* AVX512PF */
28443 IX86_BUILTIN_GATHERPFQPD,
28444 IX86_BUILTIN_GATHERPFDPS,
28445 IX86_BUILTIN_GATHERPFDPD,
28446 IX86_BUILTIN_GATHERPFQPS,
28447 IX86_BUILTIN_SCATTERPFDPD,
28448 IX86_BUILTIN_SCATTERPFDPS,
28449 IX86_BUILTIN_SCATTERPFQPD,
28450 IX86_BUILTIN_SCATTERPFQPS,
28452 /* AVX-512ER */
28453 IX86_BUILTIN_EXP2PD_MASK,
28454 IX86_BUILTIN_EXP2PS_MASK,
28455 IX86_BUILTIN_EXP2PS,
28456 IX86_BUILTIN_RCP28PD,
28457 IX86_BUILTIN_RCP28PS,
28458 IX86_BUILTIN_RCP28SD,
28459 IX86_BUILTIN_RCP28SS,
28460 IX86_BUILTIN_RSQRT28PD,
28461 IX86_BUILTIN_RSQRT28PS,
28462 IX86_BUILTIN_RSQRT28SD,
28463 IX86_BUILTIN_RSQRT28SS,
28465 /* SHA builtins. */
28466 IX86_BUILTIN_SHA1MSG1,
28467 IX86_BUILTIN_SHA1MSG2,
28468 IX86_BUILTIN_SHA1NEXTE,
28469 IX86_BUILTIN_SHA1RNDS4,
28470 IX86_BUILTIN_SHA256MSG1,
28471 IX86_BUILTIN_SHA256MSG2,
28472 IX86_BUILTIN_SHA256RNDS2,
28474 /* TFmode support builtins. */
28475 IX86_BUILTIN_INFQ,
28476 IX86_BUILTIN_HUGE_VALQ,
28477 IX86_BUILTIN_FABSQ,
28478 IX86_BUILTIN_COPYSIGNQ,
28480 /* Vectorizer support builtins. */
28481 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28482 IX86_BUILTIN_CPYSGNPS,
28483 IX86_BUILTIN_CPYSGNPD,
28484 IX86_BUILTIN_CPYSGNPS256,
28485 IX86_BUILTIN_CPYSGNPS512,
28486 IX86_BUILTIN_CPYSGNPD256,
28487 IX86_BUILTIN_CPYSGNPD512,
28488 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28489 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28492 /* FMA4 instructions. */
28493 IX86_BUILTIN_VFMADDSS,
28494 IX86_BUILTIN_VFMADDSD,
28495 IX86_BUILTIN_VFMADDPS,
28496 IX86_BUILTIN_VFMADDPD,
28497 IX86_BUILTIN_VFMADDPS256,
28498 IX86_BUILTIN_VFMADDPD256,
28499 IX86_BUILTIN_VFMADDSUBPS,
28500 IX86_BUILTIN_VFMADDSUBPD,
28501 IX86_BUILTIN_VFMADDSUBPS256,
28502 IX86_BUILTIN_VFMADDSUBPD256,
28504 /* FMA3 instructions. */
28505 IX86_BUILTIN_VFMADDSS3,
28506 IX86_BUILTIN_VFMADDSD3,
28508 /* XOP instructions. */
28509 IX86_BUILTIN_VPCMOV,
28510 IX86_BUILTIN_VPCMOV_V2DI,
28511 IX86_BUILTIN_VPCMOV_V4SI,
28512 IX86_BUILTIN_VPCMOV_V8HI,
28513 IX86_BUILTIN_VPCMOV_V16QI,
28514 IX86_BUILTIN_VPCMOV_V4SF,
28515 IX86_BUILTIN_VPCMOV_V2DF,
28516 IX86_BUILTIN_VPCMOV256,
28517 IX86_BUILTIN_VPCMOV_V4DI256,
28518 IX86_BUILTIN_VPCMOV_V8SI256,
28519 IX86_BUILTIN_VPCMOV_V16HI256,
28520 IX86_BUILTIN_VPCMOV_V32QI256,
28521 IX86_BUILTIN_VPCMOV_V8SF256,
28522 IX86_BUILTIN_VPCMOV_V4DF256,
28524 IX86_BUILTIN_VPPERM,
28526 IX86_BUILTIN_VPMACSSWW,
28527 IX86_BUILTIN_VPMACSWW,
28528 IX86_BUILTIN_VPMACSSWD,
28529 IX86_BUILTIN_VPMACSWD,
28530 IX86_BUILTIN_VPMACSSDD,
28531 IX86_BUILTIN_VPMACSDD,
28532 IX86_BUILTIN_VPMACSSDQL,
28533 IX86_BUILTIN_VPMACSSDQH,
28534 IX86_BUILTIN_VPMACSDQL,
28535 IX86_BUILTIN_VPMACSDQH,
28536 IX86_BUILTIN_VPMADCSSWD,
28537 IX86_BUILTIN_VPMADCSWD,
28539 IX86_BUILTIN_VPHADDBW,
28540 IX86_BUILTIN_VPHADDBD,
28541 IX86_BUILTIN_VPHADDBQ,
28542 IX86_BUILTIN_VPHADDWD,
28543 IX86_BUILTIN_VPHADDWQ,
28544 IX86_BUILTIN_VPHADDDQ,
28545 IX86_BUILTIN_VPHADDUBW,
28546 IX86_BUILTIN_VPHADDUBD,
28547 IX86_BUILTIN_VPHADDUBQ,
28548 IX86_BUILTIN_VPHADDUWD,
28549 IX86_BUILTIN_VPHADDUWQ,
28550 IX86_BUILTIN_VPHADDUDQ,
28551 IX86_BUILTIN_VPHSUBBW,
28552 IX86_BUILTIN_VPHSUBWD,
28553 IX86_BUILTIN_VPHSUBDQ,
28555 IX86_BUILTIN_VPROTB,
28556 IX86_BUILTIN_VPROTW,
28557 IX86_BUILTIN_VPROTD,
28558 IX86_BUILTIN_VPROTQ,
28559 IX86_BUILTIN_VPROTB_IMM,
28560 IX86_BUILTIN_VPROTW_IMM,
28561 IX86_BUILTIN_VPROTD_IMM,
28562 IX86_BUILTIN_VPROTQ_IMM,
28564 IX86_BUILTIN_VPSHLB,
28565 IX86_BUILTIN_VPSHLW,
28566 IX86_BUILTIN_VPSHLD,
28567 IX86_BUILTIN_VPSHLQ,
28568 IX86_BUILTIN_VPSHAB,
28569 IX86_BUILTIN_VPSHAW,
28570 IX86_BUILTIN_VPSHAD,
28571 IX86_BUILTIN_VPSHAQ,
28573 IX86_BUILTIN_VFRCZSS,
28574 IX86_BUILTIN_VFRCZSD,
28575 IX86_BUILTIN_VFRCZPS,
28576 IX86_BUILTIN_VFRCZPD,
28577 IX86_BUILTIN_VFRCZPS256,
28578 IX86_BUILTIN_VFRCZPD256,
28580 IX86_BUILTIN_VPCOMEQUB,
28581 IX86_BUILTIN_VPCOMNEUB,
28582 IX86_BUILTIN_VPCOMLTUB,
28583 IX86_BUILTIN_VPCOMLEUB,
28584 IX86_BUILTIN_VPCOMGTUB,
28585 IX86_BUILTIN_VPCOMGEUB,
28586 IX86_BUILTIN_VPCOMFALSEUB,
28587 IX86_BUILTIN_VPCOMTRUEUB,
28589 IX86_BUILTIN_VPCOMEQUW,
28590 IX86_BUILTIN_VPCOMNEUW,
28591 IX86_BUILTIN_VPCOMLTUW,
28592 IX86_BUILTIN_VPCOMLEUW,
28593 IX86_BUILTIN_VPCOMGTUW,
28594 IX86_BUILTIN_VPCOMGEUW,
28595 IX86_BUILTIN_VPCOMFALSEUW,
28596 IX86_BUILTIN_VPCOMTRUEUW,
28598 IX86_BUILTIN_VPCOMEQUD,
28599 IX86_BUILTIN_VPCOMNEUD,
28600 IX86_BUILTIN_VPCOMLTUD,
28601 IX86_BUILTIN_VPCOMLEUD,
28602 IX86_BUILTIN_VPCOMGTUD,
28603 IX86_BUILTIN_VPCOMGEUD,
28604 IX86_BUILTIN_VPCOMFALSEUD,
28605 IX86_BUILTIN_VPCOMTRUEUD,
28607 IX86_BUILTIN_VPCOMEQUQ,
28608 IX86_BUILTIN_VPCOMNEUQ,
28609 IX86_BUILTIN_VPCOMLTUQ,
28610 IX86_BUILTIN_VPCOMLEUQ,
28611 IX86_BUILTIN_VPCOMGTUQ,
28612 IX86_BUILTIN_VPCOMGEUQ,
28613 IX86_BUILTIN_VPCOMFALSEUQ,
28614 IX86_BUILTIN_VPCOMTRUEUQ,
28616 IX86_BUILTIN_VPCOMEQB,
28617 IX86_BUILTIN_VPCOMNEB,
28618 IX86_BUILTIN_VPCOMLTB,
28619 IX86_BUILTIN_VPCOMLEB,
28620 IX86_BUILTIN_VPCOMGTB,
28621 IX86_BUILTIN_VPCOMGEB,
28622 IX86_BUILTIN_VPCOMFALSEB,
28623 IX86_BUILTIN_VPCOMTRUEB,
28625 IX86_BUILTIN_VPCOMEQW,
28626 IX86_BUILTIN_VPCOMNEW,
28627 IX86_BUILTIN_VPCOMLTW,
28628 IX86_BUILTIN_VPCOMLEW,
28629 IX86_BUILTIN_VPCOMGTW,
28630 IX86_BUILTIN_VPCOMGEW,
28631 IX86_BUILTIN_VPCOMFALSEW,
28632 IX86_BUILTIN_VPCOMTRUEW,
28634 IX86_BUILTIN_VPCOMEQD,
28635 IX86_BUILTIN_VPCOMNED,
28636 IX86_BUILTIN_VPCOMLTD,
28637 IX86_BUILTIN_VPCOMLED,
28638 IX86_BUILTIN_VPCOMGTD,
28639 IX86_BUILTIN_VPCOMGED,
28640 IX86_BUILTIN_VPCOMFALSED,
28641 IX86_BUILTIN_VPCOMTRUED,
28643 IX86_BUILTIN_VPCOMEQQ,
28644 IX86_BUILTIN_VPCOMNEQ,
28645 IX86_BUILTIN_VPCOMLTQ,
28646 IX86_BUILTIN_VPCOMLEQ,
28647 IX86_BUILTIN_VPCOMGTQ,
28648 IX86_BUILTIN_VPCOMGEQ,
28649 IX86_BUILTIN_VPCOMFALSEQ,
28650 IX86_BUILTIN_VPCOMTRUEQ,
28652 /* LWP instructions. */
28653 IX86_BUILTIN_LLWPCB,
28654 IX86_BUILTIN_SLWPCB,
28655 IX86_BUILTIN_LWPVAL32,
28656 IX86_BUILTIN_LWPVAL64,
28657 IX86_BUILTIN_LWPINS32,
28658 IX86_BUILTIN_LWPINS64,
28660 IX86_BUILTIN_CLZS,
28662 /* RTM */
28663 IX86_BUILTIN_XBEGIN,
28664 IX86_BUILTIN_XEND,
28665 IX86_BUILTIN_XABORT,
28666 IX86_BUILTIN_XTEST,
28668 /* BMI instructions. */
28669 IX86_BUILTIN_BEXTR32,
28670 IX86_BUILTIN_BEXTR64,
28671 IX86_BUILTIN_CTZS,
28673 /* TBM instructions. */
28674 IX86_BUILTIN_BEXTRI32,
28675 IX86_BUILTIN_BEXTRI64,
28677 /* BMI2 instructions. */
28678 IX86_BUILTIN_BZHI32,
28679 IX86_BUILTIN_BZHI64,
28680 IX86_BUILTIN_PDEP32,
28681 IX86_BUILTIN_PDEP64,
28682 IX86_BUILTIN_PEXT32,
28683 IX86_BUILTIN_PEXT64,
28685 /* ADX instructions. */
28686 IX86_BUILTIN_ADDCARRYX32,
28687 IX86_BUILTIN_ADDCARRYX64,
28689 /* FSGSBASE instructions. */
28690 IX86_BUILTIN_RDFSBASE32,
28691 IX86_BUILTIN_RDFSBASE64,
28692 IX86_BUILTIN_RDGSBASE32,
28693 IX86_BUILTIN_RDGSBASE64,
28694 IX86_BUILTIN_WRFSBASE32,
28695 IX86_BUILTIN_WRFSBASE64,
28696 IX86_BUILTIN_WRGSBASE32,
28697 IX86_BUILTIN_WRGSBASE64,
28699 /* RDRND instructions. */
28700 IX86_BUILTIN_RDRAND16_STEP,
28701 IX86_BUILTIN_RDRAND32_STEP,
28702 IX86_BUILTIN_RDRAND64_STEP,
28704 /* RDSEED instructions. */
28705 IX86_BUILTIN_RDSEED16_STEP,
28706 IX86_BUILTIN_RDSEED32_STEP,
28707 IX86_BUILTIN_RDSEED64_STEP,
28709 /* F16C instructions. */
28710 IX86_BUILTIN_CVTPH2PS,
28711 IX86_BUILTIN_CVTPH2PS256,
28712 IX86_BUILTIN_CVTPS2PH,
28713 IX86_BUILTIN_CVTPS2PH256,
28715 /* CFString built-in for darwin */
28716 IX86_BUILTIN_CFSTRING,
28718 /* Builtins to get CPU type and supported features. */
28719 IX86_BUILTIN_CPU_INIT,
28720 IX86_BUILTIN_CPU_IS,
28721 IX86_BUILTIN_CPU_SUPPORTS,
28723 /* Read/write FLAGS register built-ins. */
28724 IX86_BUILTIN_READ_FLAGS,
28725 IX86_BUILTIN_WRITE_FLAGS,
28727 IX86_BUILTIN_MAX
28730 /* Table for the ix86 builtin decls. */
28731 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28733 /* Table of all of the builtin functions that are possible with different ISA's
28734 but are waiting to be built until a function is declared to use that
28735 ISA. */
28736 struct builtin_isa {
28737 const char *name; /* function name */
28738 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28739 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28740 bool const_p; /* true if the declaration is constant */
28741 bool set_and_not_built_p;
28744 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28747 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28748 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28749 function decl in the ix86_builtins array. Returns the function decl or
28750 NULL_TREE, if the builtin was not added.
28752 If the front end has a special hook for builtin functions, delay adding
28753 builtin functions that aren't in the current ISA until the ISA is changed
28754 with function specific optimization. Doing so, can save about 300K for the
28755 default compiler. When the builtin is expanded, check at that time whether
28756 it is valid.
28758 If the front end doesn't have a special hook, record all builtins, even if
28759 it isn't an instruction set in the current ISA in case the user uses
28760 function specific options for a different ISA, so that we don't get scope
28761 errors if a builtin is added in the middle of a function scope. */
28763 static inline tree
28764 def_builtin (HOST_WIDE_INT mask, const char *name,
28765 enum ix86_builtin_func_type tcode,
28766 enum ix86_builtins code)
28768 tree decl = NULL_TREE;
28770 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28772 ix86_builtins_isa[(int) code].isa = mask;
28774 mask &= ~OPTION_MASK_ISA_64BIT;
28775 if (mask == 0
28776 || (mask & ix86_isa_flags) != 0
28777 || (lang_hooks.builtin_function
28778 == lang_hooks.builtin_function_ext_scope))
28781 tree type = ix86_get_builtin_func_type (tcode);
28782 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28783 NULL, NULL_TREE);
28784 ix86_builtins[(int) code] = decl;
28785 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28787 else
28789 ix86_builtins[(int) code] = NULL_TREE;
28790 ix86_builtins_isa[(int) code].tcode = tcode;
28791 ix86_builtins_isa[(int) code].name = name;
28792 ix86_builtins_isa[(int) code].const_p = false;
28793 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28797 return decl;
28800 /* Like def_builtin, but also marks the function decl "const". */
28802 static inline tree
28803 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28804 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28806 tree decl = def_builtin (mask, name, tcode, code);
28807 if (decl)
28808 TREE_READONLY (decl) = 1;
28809 else
28810 ix86_builtins_isa[(int) code].const_p = true;
28812 return decl;
28815 /* Add any new builtin functions for a given ISA that may not have been
28816 declared. This saves a bit of space compared to adding all of the
28817 declarations to the tree, even if we didn't use them. */
28819 static void
28820 ix86_add_new_builtins (HOST_WIDE_INT isa)
28822 int i;
28824 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28826 if ((ix86_builtins_isa[i].isa & isa) != 0
28827 && ix86_builtins_isa[i].set_and_not_built_p)
28829 tree decl, type;
28831 /* Don't define the builtin again. */
28832 ix86_builtins_isa[i].set_and_not_built_p = false;
28834 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28835 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28836 type, i, BUILT_IN_MD, NULL,
28837 NULL_TREE);
28839 ix86_builtins[i] = decl;
28840 if (ix86_builtins_isa[i].const_p)
28841 TREE_READONLY (decl) = 1;
28846 /* Bits for builtin_description.flag. */
28848 /* Set when we don't support the comparison natively, and should
28849 swap_comparison in order to support it. */
28850 #define BUILTIN_DESC_SWAP_OPERANDS 1
28852 struct builtin_description
28854 const HOST_WIDE_INT mask;
28855 const enum insn_code icode;
28856 const char *const name;
28857 const enum ix86_builtins code;
28858 const enum rtx_code comparison;
28859 const int flag;
28862 static const struct builtin_description bdesc_comi[] =
28864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28865 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28866 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28890 static const struct builtin_description bdesc_pcmpestr[] =
28892 /* SSE4.2 */
28893 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28894 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28895 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28896 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28897 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28898 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28899 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28902 static const struct builtin_description bdesc_pcmpistr[] =
28904 /* SSE4.2 */
28905 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28906 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28907 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28908 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28909 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28910 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28911 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28914 /* Special builtins with variable number of arguments. */
28915 static const struct builtin_description bdesc_special_args[] =
28917 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28918 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28919 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28921 /* 80387 (for use internally for atomic compound assignment). */
28922 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28923 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28924 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28925 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28927 /* MMX */
28928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28930 /* 3DNow! */
28931 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28933 /* FXSR, XSAVE and XSAVEOPT */
28934 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28935 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28936 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28937 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28938 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28940 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28941 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28942 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28943 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28944 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28946 /* SSE */
28947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28956 /* SSE or 3DNow!A */
28957 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28958 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
28960 /* SSE2 */
28961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
28965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
28967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
28968 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
28969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
28970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28975 /* SSE3 */
28976 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28978 /* SSE4.1 */
28979 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
28981 /* SSE4A */
28982 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28983 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28985 /* AVX */
28986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
28987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
28989 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28990 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28991 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
28992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
28993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
28995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
28997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
28998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
28999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29016 /* AVX2 */
29017 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29027 /* AVX512F */
29028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29076 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29077 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29078 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29079 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29080 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29081 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29083 /* FSGSBASE */
29084 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29085 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29086 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29087 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29088 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29089 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29090 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29091 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29093 /* RTM */
29094 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29095 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29096 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29099 /* Builtins with variable number of arguments. */
29100 static const struct builtin_description bdesc_args[] =
29102 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29103 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29104 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29105 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29106 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29107 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29108 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29110 /* MMX */
29111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29174 /* 3DNow! */
29175 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29176 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29177 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29178 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29180 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29182 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29183 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29184 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29196 /* 3DNow!A */
29197 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29198 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29199 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29200 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29201 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29202 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29204 /* SSE */
29205 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29207 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29208 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29209 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29213 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29216 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29220 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29221 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29222 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29250 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29251 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29255 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29257 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29258 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29260 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29265 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29266 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29270 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29272 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29278 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29281 /* SSE MMX or 3Dnow!A */
29282 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29283 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29284 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29286 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29287 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29288 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29296 /* SSE2 */
29297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29303 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29315 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29316 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29320 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29322 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29323 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29324 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29325 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29353 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29357 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29359 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29360 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29362 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29365 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29366 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29368 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29370 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29433 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29466 /* SSE2 MMX */
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29470 /* SSE3 */
29471 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29472 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29474 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29475 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29476 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29477 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29478 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29479 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29481 /* SSSE3 */
29482 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29483 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29484 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29485 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29486 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29514 /* SSSE3. */
29515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29518 /* SSE4.1 */
29519 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29520 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29521 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29522 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29557 /* SSE4.1 */
29558 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29559 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29560 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29561 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29563 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29564 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29565 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29566 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29571 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29572 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29574 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29575 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29576 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29577 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29582 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29583 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29586 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29587 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29589 /* SSE4.2 */
29590 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29591 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29592 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29593 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29594 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29596 /* SSE4A */
29597 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29598 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29599 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29600 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29602 /* AES */
29603 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29604 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29606 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29607 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29608 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29609 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29611 /* PCLMUL */
29612 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29614 /* AVX */
29615 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29616 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29619 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29620 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29623 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29629 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29630 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29631 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29632 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29633 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29634 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29635 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29636 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29637 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29638 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29639 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29640 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29663 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29664 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29668 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29670 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29686 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29755 /* AVX2 */
29756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29757 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29758 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29759 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29764 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29765 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29766 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29767 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29773 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29795 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29796 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29797 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29798 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29799 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29800 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29801 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29802 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29803 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29804 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29805 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29806 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29808 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29820 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29822 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29823 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29824 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29825 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29903 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29905 /* BMI */
29906 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29907 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29908 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29910 /* TBM */
29911 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29912 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29914 /* F16C */
29915 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29916 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29917 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29918 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29920 /* BMI2 */
29921 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29922 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29923 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29924 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29925 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29926 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29928 /* AVX512F */
29929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
29937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
29939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
29940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
29947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
29949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
29954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
29955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
29956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
29957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
29958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
29959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
29960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
29961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
29978 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
29979 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
29980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
29981 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29982 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30063 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30090 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30091 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30092 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30093 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30125 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30130 /* Mask arithmetic operations */
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30142 /* SHA */
30143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30152 /* Builtins with rounding support. */
30153 static const struct builtin_description bdesc_round_args[] =
30155 /* AVX512F */
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30175 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30177 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30184 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30186 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30236 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30238 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30240 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30242 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30244 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30246 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30248 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30250 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30276 /* AVX512ER */
30277 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30278 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30279 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30280 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30281 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30282 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30283 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30284 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30285 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30286 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30289 /* FMA4 and XOP. */
30290 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30291 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30292 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30293 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30294 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30295 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30296 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30297 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30298 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30299 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30300 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30301 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30302 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30303 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30304 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30305 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30306 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30307 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30308 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30309 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30310 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30311 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30312 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30313 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30314 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30315 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30316 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30317 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30318 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30319 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30320 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30321 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30322 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30323 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30324 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30325 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30326 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30327 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30328 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30329 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30330 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30331 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30332 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30333 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30334 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30335 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30336 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30337 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30338 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30339 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30340 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30341 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30343 static const struct builtin_description bdesc_multi_arg[] =
30345 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30346 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30347 UNKNOWN, (int)MULTI_ARG_3_SF },
30348 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30349 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30350 UNKNOWN, (int)MULTI_ARG_3_DF },
30352 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30353 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30354 UNKNOWN, (int)MULTI_ARG_3_SF },
30355 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30356 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30357 UNKNOWN, (int)MULTI_ARG_3_DF },
30359 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30360 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30361 UNKNOWN, (int)MULTI_ARG_3_SF },
30362 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30363 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30364 UNKNOWN, (int)MULTI_ARG_3_DF },
30365 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30366 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30367 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30368 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30369 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30370 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30372 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30373 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30374 UNKNOWN, (int)MULTI_ARG_3_SF },
30375 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30376 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30377 UNKNOWN, (int)MULTI_ARG_3_DF },
30378 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30379 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30380 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30381 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30382 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30383 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30545 /* TM vector builtins. */
30547 /* Reuse the existing x86-specific `struct builtin_description' cause
30548 we're lazy. Add casts to make them fit. */
30549 static const struct builtin_description bdesc_tm[] =
30551 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30552 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30553 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30554 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30555 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30556 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30557 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30559 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30560 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30561 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30562 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30563 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30564 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30565 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30567 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30568 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30569 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30570 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30571 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30572 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30573 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30575 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30576 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30577 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30580 /* TM callbacks. */
30582 /* Return the builtin decl needed to load a vector of TYPE. */
30584 static tree
30585 ix86_builtin_tm_load (tree type)
30587 if (TREE_CODE (type) == VECTOR_TYPE)
30589 switch (tree_to_uhwi (TYPE_SIZE (type)))
30591 case 64:
30592 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30593 case 128:
30594 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30595 case 256:
30596 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30599 return NULL_TREE;
30602 /* Return the builtin decl needed to store a vector of TYPE. */
30604 static tree
30605 ix86_builtin_tm_store (tree type)
30607 if (TREE_CODE (type) == VECTOR_TYPE)
30609 switch (tree_to_uhwi (TYPE_SIZE (type)))
30611 case 64:
30612 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30613 case 128:
30614 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30615 case 256:
30616 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30619 return NULL_TREE;
30622 /* Initialize the transactional memory vector load/store builtins. */
30624 static void
30625 ix86_init_tm_builtins (void)
30627 enum ix86_builtin_func_type ftype;
30628 const struct builtin_description *d;
30629 size_t i;
30630 tree decl;
30631 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30632 tree attrs_log, attrs_type_log;
30634 if (!flag_tm)
30635 return;
30637 /* If there are no builtins defined, we must be compiling in a
30638 language without trans-mem support. */
30639 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30640 return;
30642 /* Use whatever attributes a normal TM load has. */
30643 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30644 attrs_load = DECL_ATTRIBUTES (decl);
30645 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30646 /* Use whatever attributes a normal TM store has. */
30647 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30648 attrs_store = DECL_ATTRIBUTES (decl);
30649 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30650 /* Use whatever attributes a normal TM log has. */
30651 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30652 attrs_log = DECL_ATTRIBUTES (decl);
30653 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30655 for (i = 0, d = bdesc_tm;
30656 i < ARRAY_SIZE (bdesc_tm);
30657 i++, d++)
30659 if ((d->mask & ix86_isa_flags) != 0
30660 || (lang_hooks.builtin_function
30661 == lang_hooks.builtin_function_ext_scope))
30663 tree type, attrs, attrs_type;
30664 enum built_in_function code = (enum built_in_function) d->code;
30666 ftype = (enum ix86_builtin_func_type) d->flag;
30667 type = ix86_get_builtin_func_type (ftype);
30669 if (BUILTIN_TM_LOAD_P (code))
30671 attrs = attrs_load;
30672 attrs_type = attrs_type_load;
30674 else if (BUILTIN_TM_STORE_P (code))
30676 attrs = attrs_store;
30677 attrs_type = attrs_type_store;
30679 else
30681 attrs = attrs_log;
30682 attrs_type = attrs_type_log;
30684 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30685 /* The builtin without the prefix for
30686 calling it directly. */
30687 d->name + strlen ("__builtin_"),
30688 attrs);
30689 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30690 set the TYPE_ATTRIBUTES. */
30691 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30693 set_builtin_decl (code, decl, false);
30698 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30699 in the current target ISA to allow the user to compile particular modules
30700 with different target specific options that differ from the command line
30701 options. */
30702 static void
30703 ix86_init_mmx_sse_builtins (void)
30705 const struct builtin_description * d;
30706 enum ix86_builtin_func_type ftype;
30707 size_t i;
30709 /* Add all special builtins with variable number of operands. */
30710 for (i = 0, d = bdesc_special_args;
30711 i < ARRAY_SIZE (bdesc_special_args);
30712 i++, d++)
30714 if (d->name == 0)
30715 continue;
30717 ftype = (enum ix86_builtin_func_type) d->flag;
30718 def_builtin (d->mask, d->name, ftype, d->code);
30721 /* Add all builtins with variable number of operands. */
30722 for (i = 0, d = bdesc_args;
30723 i < ARRAY_SIZE (bdesc_args);
30724 i++, d++)
30726 if (d->name == 0)
30727 continue;
30729 ftype = (enum ix86_builtin_func_type) d->flag;
30730 def_builtin_const (d->mask, d->name, ftype, d->code);
30733 /* Add all builtins with rounding. */
30734 for (i = 0, d = bdesc_round_args;
30735 i < ARRAY_SIZE (bdesc_round_args);
30736 i++, d++)
30738 if (d->name == 0)
30739 continue;
30741 ftype = (enum ix86_builtin_func_type) d->flag;
30742 def_builtin_const (d->mask, d->name, ftype, d->code);
30745 /* pcmpestr[im] insns. */
30746 for (i = 0, d = bdesc_pcmpestr;
30747 i < ARRAY_SIZE (bdesc_pcmpestr);
30748 i++, d++)
30750 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30751 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30752 else
30753 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30754 def_builtin_const (d->mask, d->name, ftype, d->code);
30757 /* pcmpistr[im] insns. */
30758 for (i = 0, d = bdesc_pcmpistr;
30759 i < ARRAY_SIZE (bdesc_pcmpistr);
30760 i++, d++)
30762 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30763 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30764 else
30765 ftype = INT_FTYPE_V16QI_V16QI_INT;
30766 def_builtin_const (d->mask, d->name, ftype, d->code);
30769 /* comi/ucomi insns. */
30770 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30772 if (d->mask == OPTION_MASK_ISA_SSE2)
30773 ftype = INT_FTYPE_V2DF_V2DF;
30774 else
30775 ftype = INT_FTYPE_V4SF_V4SF;
30776 def_builtin_const (d->mask, d->name, ftype, d->code);
30779 /* SSE */
30780 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30781 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30782 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30783 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30785 /* SSE or 3DNow!A */
30786 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30787 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30788 IX86_BUILTIN_MASKMOVQ);
30790 /* SSE2 */
30791 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30792 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30794 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30795 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30796 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30797 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30799 /* SSE3. */
30800 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30801 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30802 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30803 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30805 /* AES */
30806 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30807 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30808 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30809 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30810 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30811 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30812 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30813 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30814 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30815 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30816 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30817 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30819 /* PCLMUL */
30820 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30821 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30823 /* RDRND */
30824 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30825 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30826 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30827 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30828 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30829 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30830 IX86_BUILTIN_RDRAND64_STEP);
30832 /* AVX2 */
30833 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30834 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30835 IX86_BUILTIN_GATHERSIV2DF);
30837 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30838 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30839 IX86_BUILTIN_GATHERSIV4DF);
30841 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30842 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30843 IX86_BUILTIN_GATHERDIV2DF);
30845 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30846 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30847 IX86_BUILTIN_GATHERDIV4DF);
30849 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30850 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30851 IX86_BUILTIN_GATHERSIV4SF);
30853 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30854 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30855 IX86_BUILTIN_GATHERSIV8SF);
30857 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30858 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30859 IX86_BUILTIN_GATHERDIV4SF);
30861 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30862 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30863 IX86_BUILTIN_GATHERDIV8SF);
30865 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30866 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30867 IX86_BUILTIN_GATHERSIV2DI);
30869 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30870 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30871 IX86_BUILTIN_GATHERSIV4DI);
30873 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30874 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30875 IX86_BUILTIN_GATHERDIV2DI);
30877 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30878 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30879 IX86_BUILTIN_GATHERDIV4DI);
30881 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30882 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30883 IX86_BUILTIN_GATHERSIV4SI);
30885 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30886 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30887 IX86_BUILTIN_GATHERSIV8SI);
30889 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30890 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30891 IX86_BUILTIN_GATHERDIV4SI);
30893 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30894 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30895 IX86_BUILTIN_GATHERDIV8SI);
30897 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30898 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30899 IX86_BUILTIN_GATHERALTSIV4DF);
30901 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30902 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30903 IX86_BUILTIN_GATHERALTDIV8SF);
30905 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30906 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30907 IX86_BUILTIN_GATHERALTSIV4DI);
30909 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30910 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30911 IX86_BUILTIN_GATHERALTDIV8SI);
30913 /* AVX512F */
30914 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30915 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30916 IX86_BUILTIN_GATHER3SIV16SF);
30918 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30919 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30920 IX86_BUILTIN_GATHER3SIV8DF);
30922 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30923 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30924 IX86_BUILTIN_GATHER3DIV16SF);
30926 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30927 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30928 IX86_BUILTIN_GATHER3DIV8DF);
30930 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30931 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
30932 IX86_BUILTIN_GATHER3SIV16SI);
30934 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30935 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
30936 IX86_BUILTIN_GATHER3SIV8DI);
30938 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30939 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
30940 IX86_BUILTIN_GATHER3DIV16SI);
30942 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30943 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
30944 IX86_BUILTIN_GATHER3DIV8DI);
30946 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30947 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30948 IX86_BUILTIN_GATHER3ALTSIV8DF);
30950 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30951 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30952 IX86_BUILTIN_GATHER3ALTDIV16SF);
30954 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30955 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30956 IX86_BUILTIN_GATHER3ALTSIV8DI);
30958 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30959 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30960 IX86_BUILTIN_GATHER3ALTDIV16SI);
30962 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30963 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
30964 IX86_BUILTIN_SCATTERSIV16SF);
30966 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30967 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
30968 IX86_BUILTIN_SCATTERSIV8DF);
30970 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30971 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
30972 IX86_BUILTIN_SCATTERDIV16SF);
30974 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30975 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
30976 IX86_BUILTIN_SCATTERDIV8DF);
30978 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30979 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
30980 IX86_BUILTIN_SCATTERSIV16SI);
30982 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30983 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
30984 IX86_BUILTIN_SCATTERSIV8DI);
30986 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30987 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
30988 IX86_BUILTIN_SCATTERDIV16SI);
30990 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30991 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
30992 IX86_BUILTIN_SCATTERDIV8DI);
30994 /* AVX512PF */
30995 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
30996 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
30997 IX86_BUILTIN_GATHERPFDPD);
30998 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
30999 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31000 IX86_BUILTIN_GATHERPFDPS);
31001 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31002 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31003 IX86_BUILTIN_GATHERPFQPD);
31004 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31005 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31006 IX86_BUILTIN_GATHERPFQPS);
31007 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31008 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31009 IX86_BUILTIN_SCATTERPFDPD);
31010 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31011 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31012 IX86_BUILTIN_SCATTERPFDPS);
31013 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31014 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31015 IX86_BUILTIN_SCATTERPFQPD);
31016 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31017 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31018 IX86_BUILTIN_SCATTERPFQPS);
31020 /* SHA */
31021 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31022 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31023 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31024 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31025 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31026 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31027 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31028 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31029 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31030 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31031 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31032 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31033 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31034 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31036 /* RTM. */
31037 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31038 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31040 /* MMX access to the vec_init patterns. */
31041 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31042 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31044 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31045 V4HI_FTYPE_HI_HI_HI_HI,
31046 IX86_BUILTIN_VEC_INIT_V4HI);
31048 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31049 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31050 IX86_BUILTIN_VEC_INIT_V8QI);
31052 /* Access to the vec_extract patterns. */
31053 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31054 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31055 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31056 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31057 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31058 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31059 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31060 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31061 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31062 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31064 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31065 "__builtin_ia32_vec_ext_v4hi",
31066 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31068 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31069 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31071 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31072 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31074 /* Access to the vec_set patterns. */
31075 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31076 "__builtin_ia32_vec_set_v2di",
31077 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31079 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31080 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31082 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31083 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31085 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31086 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31088 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31089 "__builtin_ia32_vec_set_v4hi",
31090 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31092 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31093 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31095 /* RDSEED */
31096 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31097 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31098 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31099 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31100 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31101 "__builtin_ia32_rdseed_di_step",
31102 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31104 /* ADCX */
31105 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31106 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31107 def_builtin (OPTION_MASK_ISA_64BIT,
31108 "__builtin_ia32_addcarryx_u64",
31109 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31110 IX86_BUILTIN_ADDCARRYX64);
31112 /* Read/write FLAGS. */
31113 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31114 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31115 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31116 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31117 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31118 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31119 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31120 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31123 /* Add FMA4 multi-arg argument instructions */
31124 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31126 if (d->name == 0)
31127 continue;
31129 ftype = (enum ix86_builtin_func_type) d->flag;
31130 def_builtin_const (d->mask, d->name, ftype, d->code);
31134 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31135 to return a pointer to VERSION_DECL if the outcome of the expression
31136 formed by PREDICATE_CHAIN is true. This function will be called during
31137 version dispatch to decide which function version to execute. It returns
31138 the basic block at the end, to which more conditions can be added. */
31140 static basic_block
31141 add_condition_to_bb (tree function_decl, tree version_decl,
31142 tree predicate_chain, basic_block new_bb)
31144 gimple return_stmt;
31145 tree convert_expr, result_var;
31146 gimple convert_stmt;
31147 gimple call_cond_stmt;
31148 gimple if_else_stmt;
31150 basic_block bb1, bb2, bb3;
31151 edge e12, e23;
31153 tree cond_var, and_expr_var = NULL_TREE;
31154 gimple_seq gseq;
31156 tree predicate_decl, predicate_arg;
31158 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31160 gcc_assert (new_bb != NULL);
31161 gseq = bb_seq (new_bb);
31164 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31165 build_fold_addr_expr (version_decl));
31166 result_var = create_tmp_var (ptr_type_node, NULL);
31167 convert_stmt = gimple_build_assign (result_var, convert_expr);
31168 return_stmt = gimple_build_return (result_var);
31170 if (predicate_chain == NULL_TREE)
31172 gimple_seq_add_stmt (&gseq, convert_stmt);
31173 gimple_seq_add_stmt (&gseq, return_stmt);
31174 set_bb_seq (new_bb, gseq);
31175 gimple_set_bb (convert_stmt, new_bb);
31176 gimple_set_bb (return_stmt, new_bb);
31177 pop_cfun ();
31178 return new_bb;
31181 while (predicate_chain != NULL)
31183 cond_var = create_tmp_var (integer_type_node, NULL);
31184 predicate_decl = TREE_PURPOSE (predicate_chain);
31185 predicate_arg = TREE_VALUE (predicate_chain);
31186 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31187 gimple_call_set_lhs (call_cond_stmt, cond_var);
31189 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31190 gimple_set_bb (call_cond_stmt, new_bb);
31191 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31193 predicate_chain = TREE_CHAIN (predicate_chain);
31195 if (and_expr_var == NULL)
31196 and_expr_var = cond_var;
31197 else
31199 gimple assign_stmt;
31200 /* Use MIN_EXPR to check if any integer is zero?.
31201 and_expr_var = min_expr <cond_var, and_expr_var> */
31202 assign_stmt = gimple_build_assign (and_expr_var,
31203 build2 (MIN_EXPR, integer_type_node,
31204 cond_var, and_expr_var));
31206 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31207 gimple_set_bb (assign_stmt, new_bb);
31208 gimple_seq_add_stmt (&gseq, assign_stmt);
31212 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31213 integer_zero_node,
31214 NULL_TREE, NULL_TREE);
31215 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31216 gimple_set_bb (if_else_stmt, new_bb);
31217 gimple_seq_add_stmt (&gseq, if_else_stmt);
31219 gimple_seq_add_stmt (&gseq, convert_stmt);
31220 gimple_seq_add_stmt (&gseq, return_stmt);
31221 set_bb_seq (new_bb, gseq);
31223 bb1 = new_bb;
31224 e12 = split_block (bb1, if_else_stmt);
31225 bb2 = e12->dest;
31226 e12->flags &= ~EDGE_FALLTHRU;
31227 e12->flags |= EDGE_TRUE_VALUE;
31229 e23 = split_block (bb2, return_stmt);
31231 gimple_set_bb (convert_stmt, bb2);
31232 gimple_set_bb (return_stmt, bb2);
31234 bb3 = e23->dest;
31235 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31237 remove_edge (e23);
31238 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31240 pop_cfun ();
31242 return bb3;
31245 /* This parses the attribute arguments to target in DECL and determines
31246 the right builtin to use to match the platform specification.
31247 It returns the priority value for this version decl. If PREDICATE_LIST
31248 is not NULL, it stores the list of cpu features that need to be checked
31249 before dispatching this function. */
31251 static unsigned int
31252 get_builtin_code_for_version (tree decl, tree *predicate_list)
31254 tree attrs;
31255 struct cl_target_option cur_target;
31256 tree target_node;
31257 struct cl_target_option *new_target;
31258 const char *arg_str = NULL;
31259 const char *attrs_str = NULL;
31260 char *tok_str = NULL;
31261 char *token;
31263 /* Priority of i386 features, greater value is higher priority. This is
31264 used to decide the order in which function dispatch must happen. For
31265 instance, a version specialized for SSE4.2 should be checked for dispatch
31266 before a version for SSE3, as SSE4.2 implies SSE3. */
31267 enum feature_priority
31269 P_ZERO = 0,
31270 P_MMX,
31271 P_SSE,
31272 P_SSE2,
31273 P_SSE3,
31274 P_SSSE3,
31275 P_PROC_SSSE3,
31276 P_SSE4_A,
31277 P_PROC_SSE4_A,
31278 P_SSE4_1,
31279 P_SSE4_2,
31280 P_PROC_SSE4_2,
31281 P_POPCNT,
31282 P_AVX,
31283 P_PROC_AVX,
31284 P_FMA4,
31285 P_XOP,
31286 P_PROC_XOP,
31287 P_FMA,
31288 P_PROC_FMA,
31289 P_AVX2,
31290 P_PROC_AVX2
31293 enum feature_priority priority = P_ZERO;
31295 /* These are the target attribute strings for which a dispatcher is
31296 available, from fold_builtin_cpu. */
31298 static struct _feature_list
31300 const char *const name;
31301 const enum feature_priority priority;
31303 const feature_list[] =
31305 {"mmx", P_MMX},
31306 {"sse", P_SSE},
31307 {"sse2", P_SSE2},
31308 {"sse3", P_SSE3},
31309 {"sse4a", P_SSE4_A},
31310 {"ssse3", P_SSSE3},
31311 {"sse4.1", P_SSE4_1},
31312 {"sse4.2", P_SSE4_2},
31313 {"popcnt", P_POPCNT},
31314 {"avx", P_AVX},
31315 {"fma4", P_FMA4},
31316 {"xop", P_XOP},
31317 {"fma", P_FMA},
31318 {"avx2", P_AVX2}
31322 static unsigned int NUM_FEATURES
31323 = sizeof (feature_list) / sizeof (struct _feature_list);
31325 unsigned int i;
31327 tree predicate_chain = NULL_TREE;
31328 tree predicate_decl, predicate_arg;
31330 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31331 gcc_assert (attrs != NULL);
31333 attrs = TREE_VALUE (TREE_VALUE (attrs));
31335 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31336 attrs_str = TREE_STRING_POINTER (attrs);
31338 /* Return priority zero for default function. */
31339 if (strcmp (attrs_str, "default") == 0)
31340 return 0;
31342 /* Handle arch= if specified. For priority, set it to be 1 more than
31343 the best instruction set the processor can handle. For instance, if
31344 there is a version for atom and a version for ssse3 (the highest ISA
31345 priority for atom), the atom version must be checked for dispatch
31346 before the ssse3 version. */
31347 if (strstr (attrs_str, "arch=") != NULL)
31349 cl_target_option_save (&cur_target, &global_options);
31350 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31351 &global_options_set);
31353 gcc_assert (target_node);
31354 new_target = TREE_TARGET_OPTION (target_node);
31355 gcc_assert (new_target);
31357 if (new_target->arch_specified && new_target->arch > 0)
31359 switch (new_target->arch)
31361 case PROCESSOR_CORE2:
31362 arg_str = "core2";
31363 priority = P_PROC_SSSE3;
31364 break;
31365 case PROCESSOR_NEHALEM:
31366 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31367 arg_str = "westmere";
31368 else
31369 /* We translate "arch=corei7" and "arch=nehalem" to
31370 "corei7" so that it will be mapped to M_INTEL_COREI7
31371 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31372 arg_str = "corei7";
31373 priority = P_PROC_SSE4_2;
31374 break;
31375 case PROCESSOR_SANDYBRIDGE:
31376 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31377 arg_str = "ivybridge";
31378 else
31379 arg_str = "sandybridge";
31380 priority = P_PROC_AVX;
31381 break;
31382 case PROCESSOR_HASWELL:
31383 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31384 arg_str = "broadwell";
31385 else
31386 arg_str = "haswell";
31387 priority = P_PROC_AVX2;
31388 break;
31389 case PROCESSOR_BONNELL:
31390 arg_str = "bonnell";
31391 priority = P_PROC_SSSE3;
31392 break;
31393 case PROCESSOR_SILVERMONT:
31394 arg_str = "silvermont";
31395 priority = P_PROC_SSE4_2;
31396 break;
31397 case PROCESSOR_AMDFAM10:
31398 arg_str = "amdfam10h";
31399 priority = P_PROC_SSE4_A;
31400 break;
31401 case PROCESSOR_BTVER1:
31402 arg_str = "btver1";
31403 priority = P_PROC_SSE4_A;
31404 break;
31405 case PROCESSOR_BTVER2:
31406 arg_str = "btver2";
31407 priority = P_PROC_AVX;
31408 break;
31409 case PROCESSOR_BDVER1:
31410 arg_str = "bdver1";
31411 priority = P_PROC_XOP;
31412 break;
31413 case PROCESSOR_BDVER2:
31414 arg_str = "bdver2";
31415 priority = P_PROC_FMA;
31416 break;
31417 case PROCESSOR_BDVER3:
31418 arg_str = "bdver3";
31419 priority = P_PROC_FMA;
31420 break;
31421 case PROCESSOR_BDVER4:
31422 arg_str = "bdver4";
31423 priority = P_PROC_AVX2;
31424 break;
31428 cl_target_option_restore (&global_options, &cur_target);
31430 if (predicate_list && arg_str == NULL)
31432 error_at (DECL_SOURCE_LOCATION (decl),
31433 "No dispatcher found for the versioning attributes");
31434 return 0;
31437 if (predicate_list)
31439 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31440 /* For a C string literal the length includes the trailing NULL. */
31441 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31442 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31443 predicate_chain);
31447 /* Process feature name. */
31448 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31449 strcpy (tok_str, attrs_str);
31450 token = strtok (tok_str, ",");
31451 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31453 while (token != NULL)
31455 /* Do not process "arch=" */
31456 if (strncmp (token, "arch=", 5) == 0)
31458 token = strtok (NULL, ",");
31459 continue;
31461 for (i = 0; i < NUM_FEATURES; ++i)
31463 if (strcmp (token, feature_list[i].name) == 0)
31465 if (predicate_list)
31467 predicate_arg = build_string_literal (
31468 strlen (feature_list[i].name) + 1,
31469 feature_list[i].name);
31470 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31471 predicate_chain);
31473 /* Find the maximum priority feature. */
31474 if (feature_list[i].priority > priority)
31475 priority = feature_list[i].priority;
31477 break;
31480 if (predicate_list && i == NUM_FEATURES)
31482 error_at (DECL_SOURCE_LOCATION (decl),
31483 "No dispatcher found for %s", token);
31484 return 0;
31486 token = strtok (NULL, ",");
31488 free (tok_str);
31490 if (predicate_list && predicate_chain == NULL_TREE)
31492 error_at (DECL_SOURCE_LOCATION (decl),
31493 "No dispatcher found for the versioning attributes : %s",
31494 attrs_str);
31495 return 0;
31497 else if (predicate_list)
31499 predicate_chain = nreverse (predicate_chain);
31500 *predicate_list = predicate_chain;
31503 return priority;
31506 /* This compares the priority of target features in function DECL1
31507 and DECL2. It returns positive value if DECL1 is higher priority,
31508 negative value if DECL2 is higher priority and 0 if they are the
31509 same. */
31511 static int
31512 ix86_compare_version_priority (tree decl1, tree decl2)
31514 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31515 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31517 return (int)priority1 - (int)priority2;
31520 /* V1 and V2 point to function versions with different priorities
31521 based on the target ISA. This function compares their priorities. */
31523 static int
31524 feature_compare (const void *v1, const void *v2)
31526 typedef struct _function_version_info
31528 tree version_decl;
31529 tree predicate_chain;
31530 unsigned int dispatch_priority;
31531 } function_version_info;
31533 const function_version_info c1 = *(const function_version_info *)v1;
31534 const function_version_info c2 = *(const function_version_info *)v2;
31535 return (c2.dispatch_priority - c1.dispatch_priority);
31538 /* This function generates the dispatch function for
31539 multi-versioned functions. DISPATCH_DECL is the function which will
31540 contain the dispatch logic. FNDECLS are the function choices for
31541 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31542 in DISPATCH_DECL in which the dispatch code is generated. */
31544 static int
31545 dispatch_function_versions (tree dispatch_decl,
31546 void *fndecls_p,
31547 basic_block *empty_bb)
31549 tree default_decl;
31550 gimple ifunc_cpu_init_stmt;
31551 gimple_seq gseq;
31552 int ix;
31553 tree ele;
31554 vec<tree> *fndecls;
31555 unsigned int num_versions = 0;
31556 unsigned int actual_versions = 0;
31557 unsigned int i;
31559 struct _function_version_info
31561 tree version_decl;
31562 tree predicate_chain;
31563 unsigned int dispatch_priority;
31564 }*function_version_info;
31566 gcc_assert (dispatch_decl != NULL
31567 && fndecls_p != NULL
31568 && empty_bb != NULL);
31570 /*fndecls_p is actually a vector. */
31571 fndecls = static_cast<vec<tree> *> (fndecls_p);
31573 /* At least one more version other than the default. */
31574 num_versions = fndecls->length ();
31575 gcc_assert (num_versions >= 2);
31577 function_version_info = (struct _function_version_info *)
31578 XNEWVEC (struct _function_version_info, (num_versions - 1));
31580 /* The first version in the vector is the default decl. */
31581 default_decl = (*fndecls)[0];
31583 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31585 gseq = bb_seq (*empty_bb);
31586 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31587 constructors, so explicity call __builtin_cpu_init here. */
31588 ifunc_cpu_init_stmt = gimple_build_call_vec (
31589 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31590 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31591 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31592 set_bb_seq (*empty_bb, gseq);
31594 pop_cfun ();
31597 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31599 tree version_decl = ele;
31600 tree predicate_chain = NULL_TREE;
31601 unsigned int priority;
31602 /* Get attribute string, parse it and find the right predicate decl.
31603 The predicate function could be a lengthy combination of many
31604 features, like arch-type and various isa-variants. */
31605 priority = get_builtin_code_for_version (version_decl,
31606 &predicate_chain);
31608 if (predicate_chain == NULL_TREE)
31609 continue;
31611 function_version_info [actual_versions].version_decl = version_decl;
31612 function_version_info [actual_versions].predicate_chain
31613 = predicate_chain;
31614 function_version_info [actual_versions].dispatch_priority = priority;
31615 actual_versions++;
31618 /* Sort the versions according to descending order of dispatch priority. The
31619 priority is based on the ISA. This is not a perfect solution. There
31620 could still be ambiguity. If more than one function version is suitable
31621 to execute, which one should be dispatched? In future, allow the user
31622 to specify a dispatch priority next to the version. */
31623 qsort (function_version_info, actual_versions,
31624 sizeof (struct _function_version_info), feature_compare);
31626 for (i = 0; i < actual_versions; ++i)
31627 *empty_bb = add_condition_to_bb (dispatch_decl,
31628 function_version_info[i].version_decl,
31629 function_version_info[i].predicate_chain,
31630 *empty_bb);
31632 /* dispatch default version at the end. */
31633 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31634 NULL, *empty_bb);
31636 free (function_version_info);
31637 return 0;
31640 /* Comparator function to be used in qsort routine to sort attribute
31641 specification strings to "target". */
31643 static int
31644 attr_strcmp (const void *v1, const void *v2)
31646 const char *c1 = *(char *const*)v1;
31647 const char *c2 = *(char *const*)v2;
31648 return strcmp (c1, c2);
31651 /* ARGLIST is the argument to target attribute. This function tokenizes
31652 the comma separated arguments, sorts them and returns a string which
31653 is a unique identifier for the comma separated arguments. It also
31654 replaces non-identifier characters "=,-" with "_". */
31656 static char *
31657 sorted_attr_string (tree arglist)
31659 tree arg;
31660 size_t str_len_sum = 0;
31661 char **args = NULL;
31662 char *attr_str, *ret_str;
31663 char *attr = NULL;
31664 unsigned int argnum = 1;
31665 unsigned int i;
31667 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31669 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31670 size_t len = strlen (str);
31671 str_len_sum += len + 1;
31672 if (arg != arglist)
31673 argnum++;
31674 for (i = 0; i < strlen (str); i++)
31675 if (str[i] == ',')
31676 argnum++;
31679 attr_str = XNEWVEC (char, str_len_sum);
31680 str_len_sum = 0;
31681 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31683 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31684 size_t len = strlen (str);
31685 memcpy (attr_str + str_len_sum, str, len);
31686 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31687 str_len_sum += len + 1;
31690 /* Replace "=,-" with "_". */
31691 for (i = 0; i < strlen (attr_str); i++)
31692 if (attr_str[i] == '=' || attr_str[i]== '-')
31693 attr_str[i] = '_';
31695 if (argnum == 1)
31696 return attr_str;
31698 args = XNEWVEC (char *, argnum);
31700 i = 0;
31701 attr = strtok (attr_str, ",");
31702 while (attr != NULL)
31704 args[i] = attr;
31705 i++;
31706 attr = strtok (NULL, ",");
31709 qsort (args, argnum, sizeof (char *), attr_strcmp);
31711 ret_str = XNEWVEC (char, str_len_sum);
31712 str_len_sum = 0;
31713 for (i = 0; i < argnum; i++)
31715 size_t len = strlen (args[i]);
31716 memcpy (ret_str + str_len_sum, args[i], len);
31717 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31718 str_len_sum += len + 1;
31721 XDELETEVEC (args);
31722 XDELETEVEC (attr_str);
31723 return ret_str;
31726 /* This function changes the assembler name for functions that are
31727 versions. If DECL is a function version and has a "target"
31728 attribute, it appends the attribute string to its assembler name. */
31730 static tree
31731 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31733 tree version_attr;
31734 const char *orig_name, *version_string;
31735 char *attr_str, *assembler_name;
31737 if (DECL_DECLARED_INLINE_P (decl)
31738 && lookup_attribute ("gnu_inline",
31739 DECL_ATTRIBUTES (decl)))
31740 error_at (DECL_SOURCE_LOCATION (decl),
31741 "Function versions cannot be marked as gnu_inline,"
31742 " bodies have to be generated");
31744 if (DECL_VIRTUAL_P (decl)
31745 || DECL_VINDEX (decl))
31746 sorry ("Virtual function multiversioning not supported");
31748 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31750 /* target attribute string cannot be NULL. */
31751 gcc_assert (version_attr != NULL_TREE);
31753 orig_name = IDENTIFIER_POINTER (id);
31754 version_string
31755 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31757 if (strcmp (version_string, "default") == 0)
31758 return id;
31760 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31761 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31763 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31765 /* Allow assembler name to be modified if already set. */
31766 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31767 SET_DECL_RTL (decl, NULL);
31769 tree ret = get_identifier (assembler_name);
31770 XDELETEVEC (attr_str);
31771 XDELETEVEC (assembler_name);
31772 return ret;
31775 /* This function returns true if FN1 and FN2 are versions of the same function,
31776 that is, the target strings of the function decls are different. This assumes
31777 that FN1 and FN2 have the same signature. */
31779 static bool
31780 ix86_function_versions (tree fn1, tree fn2)
31782 tree attr1, attr2;
31783 char *target1, *target2;
31784 bool result;
31786 if (TREE_CODE (fn1) != FUNCTION_DECL
31787 || TREE_CODE (fn2) != FUNCTION_DECL)
31788 return false;
31790 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31791 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31793 /* At least one function decl should have the target attribute specified. */
31794 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31795 return false;
31797 /* Diagnose missing target attribute if one of the decls is already
31798 multi-versioned. */
31799 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31801 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31803 if (attr2 != NULL_TREE)
31805 tree tem = fn1;
31806 fn1 = fn2;
31807 fn2 = tem;
31808 attr1 = attr2;
31810 error_at (DECL_SOURCE_LOCATION (fn2),
31811 "missing %<target%> attribute for multi-versioned %D",
31812 fn2);
31813 inform (DECL_SOURCE_LOCATION (fn1),
31814 "previous declaration of %D", fn1);
31815 /* Prevent diagnosing of the same error multiple times. */
31816 DECL_ATTRIBUTES (fn2)
31817 = tree_cons (get_identifier ("target"),
31818 copy_node (TREE_VALUE (attr1)),
31819 DECL_ATTRIBUTES (fn2));
31821 return false;
31824 target1 = sorted_attr_string (TREE_VALUE (attr1));
31825 target2 = sorted_attr_string (TREE_VALUE (attr2));
31827 /* The sorted target strings must be different for fn1 and fn2
31828 to be versions. */
31829 if (strcmp (target1, target2) == 0)
31830 result = false;
31831 else
31832 result = true;
31834 XDELETEVEC (target1);
31835 XDELETEVEC (target2);
31837 return result;
31840 static tree
31841 ix86_mangle_decl_assembler_name (tree decl, tree id)
31843 /* For function version, add the target suffix to the assembler name. */
31844 if (TREE_CODE (decl) == FUNCTION_DECL
31845 && DECL_FUNCTION_VERSIONED (decl))
31846 id = ix86_mangle_function_version_assembler_name (decl, id);
31847 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31848 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31849 #endif
31851 return id;
31854 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31855 is true, append the full path name of the source file. */
31857 static char *
31858 make_name (tree decl, const char *suffix, bool make_unique)
31860 char *global_var_name;
31861 int name_len;
31862 const char *name;
31863 const char *unique_name = NULL;
31865 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31867 /* Get a unique name that can be used globally without any chances
31868 of collision at link time. */
31869 if (make_unique)
31870 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31872 name_len = strlen (name) + strlen (suffix) + 2;
31874 if (make_unique)
31875 name_len += strlen (unique_name) + 1;
31876 global_var_name = XNEWVEC (char, name_len);
31878 /* Use '.' to concatenate names as it is demangler friendly. */
31879 if (make_unique)
31880 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31881 suffix);
31882 else
31883 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31885 return global_var_name;
31888 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31890 /* Make a dispatcher declaration for the multi-versioned function DECL.
31891 Calls to DECL function will be replaced with calls to the dispatcher
31892 by the front-end. Return the decl created. */
31894 static tree
31895 make_dispatcher_decl (const tree decl)
31897 tree func_decl;
31898 char *func_name;
31899 tree fn_type, func_type;
31900 bool is_uniq = false;
31902 if (TREE_PUBLIC (decl) == 0)
31903 is_uniq = true;
31905 func_name = make_name (decl, "ifunc", is_uniq);
31907 fn_type = TREE_TYPE (decl);
31908 func_type = build_function_type (TREE_TYPE (fn_type),
31909 TYPE_ARG_TYPES (fn_type));
31911 func_decl = build_fn_decl (func_name, func_type);
31912 XDELETEVEC (func_name);
31913 TREE_USED (func_decl) = 1;
31914 DECL_CONTEXT (func_decl) = NULL_TREE;
31915 DECL_INITIAL (func_decl) = error_mark_node;
31916 DECL_ARTIFICIAL (func_decl) = 1;
31917 /* Mark this func as external, the resolver will flip it again if
31918 it gets generated. */
31919 DECL_EXTERNAL (func_decl) = 1;
31920 /* This will be of type IFUNCs have to be externally visible. */
31921 TREE_PUBLIC (func_decl) = 1;
31923 return func_decl;
31926 #endif
31928 /* Returns true if decl is multi-versioned and DECL is the default function,
31929 that is it is not tagged with target specific optimization. */
31931 static bool
31932 is_function_default_version (const tree decl)
31934 if (TREE_CODE (decl) != FUNCTION_DECL
31935 || !DECL_FUNCTION_VERSIONED (decl))
31936 return false;
31937 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31938 gcc_assert (attr);
31939 attr = TREE_VALUE (TREE_VALUE (attr));
31940 return (TREE_CODE (attr) == STRING_CST
31941 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
31944 /* Make a dispatcher declaration for the multi-versioned function DECL.
31945 Calls to DECL function will be replaced with calls to the dispatcher
31946 by the front-end. Returns the decl of the dispatcher function. */
31948 static tree
31949 ix86_get_function_versions_dispatcher (void *decl)
31951 tree fn = (tree) decl;
31952 struct cgraph_node *node = NULL;
31953 struct cgraph_node *default_node = NULL;
31954 struct cgraph_function_version_info *node_v = NULL;
31955 struct cgraph_function_version_info *first_v = NULL;
31957 tree dispatch_decl = NULL;
31959 struct cgraph_function_version_info *default_version_info = NULL;
31961 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31963 node = cgraph_get_node (fn);
31964 gcc_assert (node != NULL);
31966 node_v = get_cgraph_node_version (node);
31967 gcc_assert (node_v != NULL);
31969 if (node_v->dispatcher_resolver != NULL)
31970 return node_v->dispatcher_resolver;
31972 /* Find the default version and make it the first node. */
31973 first_v = node_v;
31974 /* Go to the beginning of the chain. */
31975 while (first_v->prev != NULL)
31976 first_v = first_v->prev;
31977 default_version_info = first_v;
31978 while (default_version_info != NULL)
31980 if (is_function_default_version
31981 (default_version_info->this_node->decl))
31982 break;
31983 default_version_info = default_version_info->next;
31986 /* If there is no default node, just return NULL. */
31987 if (default_version_info == NULL)
31988 return NULL;
31990 /* Make default info the first node. */
31991 if (first_v != default_version_info)
31993 default_version_info->prev->next = default_version_info->next;
31994 if (default_version_info->next)
31995 default_version_info->next->prev = default_version_info->prev;
31996 first_v->prev = default_version_info;
31997 default_version_info->next = first_v;
31998 default_version_info->prev = NULL;
32001 default_node = default_version_info->this_node;
32003 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32004 if (targetm.has_ifunc_p ())
32006 struct cgraph_function_version_info *it_v = NULL;
32007 struct cgraph_node *dispatcher_node = NULL;
32008 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32010 /* Right now, the dispatching is done via ifunc. */
32011 dispatch_decl = make_dispatcher_decl (default_node->decl);
32013 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32014 gcc_assert (dispatcher_node != NULL);
32015 dispatcher_node->dispatcher_function = 1;
32016 dispatcher_version_info
32017 = insert_new_cgraph_node_version (dispatcher_node);
32018 dispatcher_version_info->next = default_version_info;
32019 dispatcher_node->definition = 1;
32021 /* Set the dispatcher for all the versions. */
32022 it_v = default_version_info;
32023 while (it_v != NULL)
32025 it_v->dispatcher_resolver = dispatch_decl;
32026 it_v = it_v->next;
32029 else
32030 #endif
32032 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32033 "multiversioning needs ifunc which is not supported "
32034 "on this target");
32037 return dispatch_decl;
32040 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32041 it to CHAIN. */
32043 static tree
32044 make_attribute (const char *name, const char *arg_name, tree chain)
32046 tree attr_name;
32047 tree attr_arg_name;
32048 tree attr_args;
32049 tree attr;
32051 attr_name = get_identifier (name);
32052 attr_arg_name = build_string (strlen (arg_name), arg_name);
32053 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32054 attr = tree_cons (attr_name, attr_args, chain);
32055 return attr;
32058 /* Make the resolver function decl to dispatch the versions of
32059 a multi-versioned function, DEFAULT_DECL. Create an
32060 empty basic block in the resolver and store the pointer in
32061 EMPTY_BB. Return the decl of the resolver function. */
32063 static tree
32064 make_resolver_func (const tree default_decl,
32065 const tree dispatch_decl,
32066 basic_block *empty_bb)
32068 char *resolver_name;
32069 tree decl, type, decl_name, t;
32070 bool is_uniq = false;
32072 /* IFUNC's have to be globally visible. So, if the default_decl is
32073 not, then the name of the IFUNC should be made unique. */
32074 if (TREE_PUBLIC (default_decl) == 0)
32075 is_uniq = true;
32077 /* Append the filename to the resolver function if the versions are
32078 not externally visible. This is because the resolver function has
32079 to be externally visible for the loader to find it. So, appending
32080 the filename will prevent conflicts with a resolver function from
32081 another module which is based on the same version name. */
32082 resolver_name = make_name (default_decl, "resolver", is_uniq);
32084 /* The resolver function should return a (void *). */
32085 type = build_function_type_list (ptr_type_node, NULL_TREE);
32087 decl = build_fn_decl (resolver_name, type);
32088 decl_name = get_identifier (resolver_name);
32089 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32091 DECL_NAME (decl) = decl_name;
32092 TREE_USED (decl) = 1;
32093 DECL_ARTIFICIAL (decl) = 1;
32094 DECL_IGNORED_P (decl) = 0;
32095 /* IFUNC resolvers have to be externally visible. */
32096 TREE_PUBLIC (decl) = 1;
32097 DECL_UNINLINABLE (decl) = 1;
32099 /* Resolver is not external, body is generated. */
32100 DECL_EXTERNAL (decl) = 0;
32101 DECL_EXTERNAL (dispatch_decl) = 0;
32103 DECL_CONTEXT (decl) = NULL_TREE;
32104 DECL_INITIAL (decl) = make_node (BLOCK);
32105 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32107 if (DECL_COMDAT_GROUP (default_decl)
32108 || TREE_PUBLIC (default_decl))
32110 /* In this case, each translation unit with a call to this
32111 versioned function will put out a resolver. Ensure it
32112 is comdat to keep just one copy. */
32113 DECL_COMDAT (decl) = 1;
32114 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32116 /* Build result decl and add to function_decl. */
32117 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32118 DECL_ARTIFICIAL (t) = 1;
32119 DECL_IGNORED_P (t) = 1;
32120 DECL_RESULT (decl) = t;
32122 gimplify_function_tree (decl);
32123 push_cfun (DECL_STRUCT_FUNCTION (decl));
32124 *empty_bb = init_lowered_empty_function (decl, false);
32126 cgraph_add_new_function (decl, true);
32127 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32129 pop_cfun ();
32131 gcc_assert (dispatch_decl != NULL);
32132 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32133 DECL_ATTRIBUTES (dispatch_decl)
32134 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32136 /* Create the alias for dispatch to resolver here. */
32137 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32138 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32139 XDELETEVEC (resolver_name);
32140 return decl;
32143 /* Generate the dispatching code body to dispatch multi-versioned function
32144 DECL. The target hook is called to process the "target" attributes and
32145 provide the code to dispatch the right function at run-time. NODE points
32146 to the dispatcher decl whose body will be created. */
32148 static tree
32149 ix86_generate_version_dispatcher_body (void *node_p)
32151 tree resolver_decl;
32152 basic_block empty_bb;
32153 tree default_ver_decl;
32154 struct cgraph_node *versn;
32155 struct cgraph_node *node;
32157 struct cgraph_function_version_info *node_version_info = NULL;
32158 struct cgraph_function_version_info *versn_info = NULL;
32160 node = (cgraph_node *)node_p;
32162 node_version_info = get_cgraph_node_version (node);
32163 gcc_assert (node->dispatcher_function
32164 && node_version_info != NULL);
32166 if (node_version_info->dispatcher_resolver)
32167 return node_version_info->dispatcher_resolver;
32169 /* The first version in the chain corresponds to the default version. */
32170 default_ver_decl = node_version_info->next->this_node->decl;
32172 /* node is going to be an alias, so remove the finalized bit. */
32173 node->definition = false;
32175 resolver_decl = make_resolver_func (default_ver_decl,
32176 node->decl, &empty_bb);
32178 node_version_info->dispatcher_resolver = resolver_decl;
32180 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32182 auto_vec<tree, 2> fn_ver_vec;
32184 for (versn_info = node_version_info->next; versn_info;
32185 versn_info = versn_info->next)
32187 versn = versn_info->this_node;
32188 /* Check for virtual functions here again, as by this time it should
32189 have been determined if this function needs a vtable index or
32190 not. This happens for methods in derived classes that override
32191 virtual methods in base classes but are not explicitly marked as
32192 virtual. */
32193 if (DECL_VINDEX (versn->decl))
32194 sorry ("Virtual function multiversioning not supported");
32196 fn_ver_vec.safe_push (versn->decl);
32199 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32200 rebuild_cgraph_edges ();
32201 pop_cfun ();
32202 return resolver_decl;
32204 /* This builds the processor_model struct type defined in
32205 libgcc/config/i386/cpuinfo.c */
32207 static tree
32208 build_processor_model_struct (void)
32210 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32211 "__cpu_features"};
32212 tree field = NULL_TREE, field_chain = NULL_TREE;
32213 int i;
32214 tree type = make_node (RECORD_TYPE);
32216 /* The first 3 fields are unsigned int. */
32217 for (i = 0; i < 3; ++i)
32219 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32220 get_identifier (field_name[i]), unsigned_type_node);
32221 if (field_chain != NULL_TREE)
32222 DECL_CHAIN (field) = field_chain;
32223 field_chain = field;
32226 /* The last field is an array of unsigned integers of size one. */
32227 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32228 get_identifier (field_name[3]),
32229 build_array_type (unsigned_type_node,
32230 build_index_type (size_one_node)));
32231 if (field_chain != NULL_TREE)
32232 DECL_CHAIN (field) = field_chain;
32233 field_chain = field;
32235 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32236 return type;
32239 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32241 static tree
32242 make_var_decl (tree type, const char *name)
32244 tree new_decl;
32246 new_decl = build_decl (UNKNOWN_LOCATION,
32247 VAR_DECL,
32248 get_identifier(name),
32249 type);
32251 DECL_EXTERNAL (new_decl) = 1;
32252 TREE_STATIC (new_decl) = 1;
32253 TREE_PUBLIC (new_decl) = 1;
32254 DECL_INITIAL (new_decl) = 0;
32255 DECL_ARTIFICIAL (new_decl) = 0;
32256 DECL_PRESERVE_P (new_decl) = 1;
32258 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32259 assemble_variable (new_decl, 0, 0, 0);
32261 return new_decl;
32264 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32265 into an integer defined in libgcc/config/i386/cpuinfo.c */
32267 static tree
32268 fold_builtin_cpu (tree fndecl, tree *args)
32270 unsigned int i;
32271 enum ix86_builtins fn_code = (enum ix86_builtins)
32272 DECL_FUNCTION_CODE (fndecl);
32273 tree param_string_cst = NULL;
32275 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32276 enum processor_features
32278 F_CMOV = 0,
32279 F_MMX,
32280 F_POPCNT,
32281 F_SSE,
32282 F_SSE2,
32283 F_SSE3,
32284 F_SSSE3,
32285 F_SSE4_1,
32286 F_SSE4_2,
32287 F_AVX,
32288 F_AVX2,
32289 F_SSE4_A,
32290 F_FMA4,
32291 F_XOP,
32292 F_FMA,
32293 F_MAX
32296 /* These are the values for vendor types and cpu types and subtypes
32297 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32298 the corresponding start value. */
32299 enum processor_model
32301 M_INTEL = 1,
32302 M_AMD,
32303 M_CPU_TYPE_START,
32304 M_INTEL_BONNELL,
32305 M_INTEL_CORE2,
32306 M_INTEL_COREI7,
32307 M_AMDFAM10H,
32308 M_AMDFAM15H,
32309 M_INTEL_SILVERMONT,
32310 M_AMD_BTVER1,
32311 M_AMD_BTVER2,
32312 M_CPU_SUBTYPE_START,
32313 M_INTEL_COREI7_NEHALEM,
32314 M_INTEL_COREI7_WESTMERE,
32315 M_INTEL_COREI7_SANDYBRIDGE,
32316 M_AMDFAM10H_BARCELONA,
32317 M_AMDFAM10H_SHANGHAI,
32318 M_AMDFAM10H_ISTANBUL,
32319 M_AMDFAM15H_BDVER1,
32320 M_AMDFAM15H_BDVER2,
32321 M_AMDFAM15H_BDVER3,
32322 M_AMDFAM15H_BDVER4,
32323 M_INTEL_COREI7_IVYBRIDGE,
32324 M_INTEL_COREI7_HASWELL
32327 static struct _arch_names_table
32329 const char *const name;
32330 const enum processor_model model;
32332 const arch_names_table[] =
32334 {"amd", M_AMD},
32335 {"intel", M_INTEL},
32336 {"atom", M_INTEL_BONNELL},
32337 {"slm", M_INTEL_SILVERMONT},
32338 {"core2", M_INTEL_CORE2},
32339 {"corei7", M_INTEL_COREI7},
32340 {"nehalem", M_INTEL_COREI7_NEHALEM},
32341 {"westmere", M_INTEL_COREI7_WESTMERE},
32342 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32343 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32344 {"haswell", M_INTEL_COREI7_HASWELL},
32345 {"bonnell", M_INTEL_BONNELL},
32346 {"silvermont", M_INTEL_SILVERMONT},
32347 {"amdfam10h", M_AMDFAM10H},
32348 {"barcelona", M_AMDFAM10H_BARCELONA},
32349 {"shanghai", M_AMDFAM10H_SHANGHAI},
32350 {"istanbul", M_AMDFAM10H_ISTANBUL},
32351 {"btver1", M_AMD_BTVER1},
32352 {"amdfam15h", M_AMDFAM15H},
32353 {"bdver1", M_AMDFAM15H_BDVER1},
32354 {"bdver2", M_AMDFAM15H_BDVER2},
32355 {"bdver3", M_AMDFAM15H_BDVER3},
32356 {"bdver4", M_AMDFAM15H_BDVER4},
32357 {"btver2", M_AMD_BTVER2},
32360 static struct _isa_names_table
32362 const char *const name;
32363 const enum processor_features feature;
32365 const isa_names_table[] =
32367 {"cmov", F_CMOV},
32368 {"mmx", F_MMX},
32369 {"popcnt", F_POPCNT},
32370 {"sse", F_SSE},
32371 {"sse2", F_SSE2},
32372 {"sse3", F_SSE3},
32373 {"ssse3", F_SSSE3},
32374 {"sse4a", F_SSE4_A},
32375 {"sse4.1", F_SSE4_1},
32376 {"sse4.2", F_SSE4_2},
32377 {"avx", F_AVX},
32378 {"fma4", F_FMA4},
32379 {"xop", F_XOP},
32380 {"fma", F_FMA},
32381 {"avx2", F_AVX2}
32384 tree __processor_model_type = build_processor_model_struct ();
32385 tree __cpu_model_var = make_var_decl (__processor_model_type,
32386 "__cpu_model");
32389 varpool_add_new_variable (__cpu_model_var);
32391 gcc_assert ((args != NULL) && (*args != NULL));
32393 param_string_cst = *args;
32394 while (param_string_cst
32395 && TREE_CODE (param_string_cst) != STRING_CST)
32397 /* *args must be a expr that can contain other EXPRS leading to a
32398 STRING_CST. */
32399 if (!EXPR_P (param_string_cst))
32401 error ("Parameter to builtin must be a string constant or literal");
32402 return integer_zero_node;
32404 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32407 gcc_assert (param_string_cst);
32409 if (fn_code == IX86_BUILTIN_CPU_IS)
32411 tree ref;
32412 tree field;
32413 tree final;
32415 unsigned int field_val = 0;
32416 unsigned int NUM_ARCH_NAMES
32417 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32419 for (i = 0; i < NUM_ARCH_NAMES; i++)
32420 if (strcmp (arch_names_table[i].name,
32421 TREE_STRING_POINTER (param_string_cst)) == 0)
32422 break;
32424 if (i == NUM_ARCH_NAMES)
32426 error ("Parameter to builtin not valid: %s",
32427 TREE_STRING_POINTER (param_string_cst));
32428 return integer_zero_node;
32431 field = TYPE_FIELDS (__processor_model_type);
32432 field_val = arch_names_table[i].model;
32434 /* CPU types are stored in the next field. */
32435 if (field_val > M_CPU_TYPE_START
32436 && field_val < M_CPU_SUBTYPE_START)
32438 field = DECL_CHAIN (field);
32439 field_val -= M_CPU_TYPE_START;
32442 /* CPU subtypes are stored in the next field. */
32443 if (field_val > M_CPU_SUBTYPE_START)
32445 field = DECL_CHAIN ( DECL_CHAIN (field));
32446 field_val -= M_CPU_SUBTYPE_START;
32449 /* Get the appropriate field in __cpu_model. */
32450 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32451 field, NULL_TREE);
32453 /* Check the value. */
32454 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32455 build_int_cstu (unsigned_type_node, field_val));
32456 return build1 (CONVERT_EXPR, integer_type_node, final);
32458 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32460 tree ref;
32461 tree array_elt;
32462 tree field;
32463 tree final;
32465 unsigned int field_val = 0;
32466 unsigned int NUM_ISA_NAMES
32467 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32469 for (i = 0; i < NUM_ISA_NAMES; i++)
32470 if (strcmp (isa_names_table[i].name,
32471 TREE_STRING_POINTER (param_string_cst)) == 0)
32472 break;
32474 if (i == NUM_ISA_NAMES)
32476 error ("Parameter to builtin not valid: %s",
32477 TREE_STRING_POINTER (param_string_cst));
32478 return integer_zero_node;
32481 field = TYPE_FIELDS (__processor_model_type);
32482 /* Get the last field, which is __cpu_features. */
32483 while (DECL_CHAIN (field))
32484 field = DECL_CHAIN (field);
32486 /* Get the appropriate field: __cpu_model.__cpu_features */
32487 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32488 field, NULL_TREE);
32490 /* Access the 0th element of __cpu_features array. */
32491 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32492 integer_zero_node, NULL_TREE, NULL_TREE);
32494 field_val = (1 << isa_names_table[i].feature);
32495 /* Return __cpu_model.__cpu_features[0] & field_val */
32496 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32497 build_int_cstu (unsigned_type_node, field_val));
32498 return build1 (CONVERT_EXPR, integer_type_node, final);
32500 gcc_unreachable ();
32503 static tree
32504 ix86_fold_builtin (tree fndecl, int n_args,
32505 tree *args, bool ignore ATTRIBUTE_UNUSED)
32507 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32509 enum ix86_builtins fn_code = (enum ix86_builtins)
32510 DECL_FUNCTION_CODE (fndecl);
32511 if (fn_code == IX86_BUILTIN_CPU_IS
32512 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32514 gcc_assert (n_args == 1);
32515 return fold_builtin_cpu (fndecl, args);
32519 #ifdef SUBTARGET_FOLD_BUILTIN
32520 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32521 #endif
32523 return NULL_TREE;
32526 /* Make builtins to detect cpu type and features supported. NAME is
32527 the builtin name, CODE is the builtin code, and FTYPE is the function
32528 type of the builtin. */
32530 static void
32531 make_cpu_type_builtin (const char* name, int code,
32532 enum ix86_builtin_func_type ftype, bool is_const)
32534 tree decl;
32535 tree type;
32537 type = ix86_get_builtin_func_type (ftype);
32538 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32539 NULL, NULL_TREE);
32540 gcc_assert (decl != NULL_TREE);
32541 ix86_builtins[(int) code] = decl;
32542 TREE_READONLY (decl) = is_const;
32545 /* Make builtins to get CPU type and features supported. The created
32546 builtins are :
32548 __builtin_cpu_init (), to detect cpu type and features,
32549 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32550 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32553 static void
32554 ix86_init_platform_type_builtins (void)
32556 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32557 INT_FTYPE_VOID, false);
32558 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32559 INT_FTYPE_PCCHAR, true);
32560 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32561 INT_FTYPE_PCCHAR, true);
32564 /* Internal method for ix86_init_builtins. */
32566 static void
32567 ix86_init_builtins_va_builtins_abi (void)
32569 tree ms_va_ref, sysv_va_ref;
32570 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32571 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32572 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32573 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32575 if (!TARGET_64BIT)
32576 return;
32577 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32578 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32579 ms_va_ref = build_reference_type (ms_va_list_type_node);
32580 sysv_va_ref =
32581 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32583 fnvoid_va_end_ms =
32584 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32585 fnvoid_va_start_ms =
32586 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32587 fnvoid_va_end_sysv =
32588 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32589 fnvoid_va_start_sysv =
32590 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32591 NULL_TREE);
32592 fnvoid_va_copy_ms =
32593 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32594 NULL_TREE);
32595 fnvoid_va_copy_sysv =
32596 build_function_type_list (void_type_node, sysv_va_ref,
32597 sysv_va_ref, NULL_TREE);
32599 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32600 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32601 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32602 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32603 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32604 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32605 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32606 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32607 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32608 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32609 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32610 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32613 static void
32614 ix86_init_builtin_types (void)
32616 tree float128_type_node, float80_type_node;
32618 /* The __float80 type. */
32619 float80_type_node = long_double_type_node;
32620 if (TYPE_MODE (float80_type_node) != XFmode)
32622 /* The __float80 type. */
32623 float80_type_node = make_node (REAL_TYPE);
32625 TYPE_PRECISION (float80_type_node) = 80;
32626 layout_type (float80_type_node);
32628 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32630 /* The __float128 type. */
32631 float128_type_node = make_node (REAL_TYPE);
32632 TYPE_PRECISION (float128_type_node) = 128;
32633 layout_type (float128_type_node);
32634 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32636 /* This macro is built by i386-builtin-types.awk. */
32637 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32640 static void
32641 ix86_init_builtins (void)
32643 tree t;
32645 ix86_init_builtin_types ();
32647 /* Builtins to get CPU type and features. */
32648 ix86_init_platform_type_builtins ();
32650 /* TFmode support builtins. */
32651 def_builtin_const (0, "__builtin_infq",
32652 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32653 def_builtin_const (0, "__builtin_huge_valq",
32654 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32656 /* We will expand them to normal call if SSE isn't available since
32657 they are used by libgcc. */
32658 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32659 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32660 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32661 TREE_READONLY (t) = 1;
32662 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32664 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32665 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32666 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32667 TREE_READONLY (t) = 1;
32668 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32670 ix86_init_tm_builtins ();
32671 ix86_init_mmx_sse_builtins ();
32673 if (TARGET_LP64)
32674 ix86_init_builtins_va_builtins_abi ();
32676 #ifdef SUBTARGET_INIT_BUILTINS
32677 SUBTARGET_INIT_BUILTINS;
32678 #endif
32681 /* Return the ix86 builtin for CODE. */
32683 static tree
32684 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32686 if (code >= IX86_BUILTIN_MAX)
32687 return error_mark_node;
32689 return ix86_builtins[code];
32692 /* Errors in the source file can cause expand_expr to return const0_rtx
32693 where we expect a vector. To avoid crashing, use one of the vector
32694 clear instructions. */
32695 static rtx
32696 safe_vector_operand (rtx x, enum machine_mode mode)
32698 if (x == const0_rtx)
32699 x = CONST0_RTX (mode);
32700 return x;
32703 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32705 static rtx
32706 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32708 rtx pat;
32709 tree arg0 = CALL_EXPR_ARG (exp, 0);
32710 tree arg1 = CALL_EXPR_ARG (exp, 1);
32711 rtx op0 = expand_normal (arg0);
32712 rtx op1 = expand_normal (arg1);
32713 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32714 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32715 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32717 if (VECTOR_MODE_P (mode0))
32718 op0 = safe_vector_operand (op0, mode0);
32719 if (VECTOR_MODE_P (mode1))
32720 op1 = safe_vector_operand (op1, mode1);
32722 if (optimize || !target
32723 || GET_MODE (target) != tmode
32724 || !insn_data[icode].operand[0].predicate (target, tmode))
32725 target = gen_reg_rtx (tmode);
32727 if (GET_MODE (op1) == SImode && mode1 == TImode)
32729 rtx x = gen_reg_rtx (V4SImode);
32730 emit_insn (gen_sse2_loadd (x, op1));
32731 op1 = gen_lowpart (TImode, x);
32734 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32735 op0 = copy_to_mode_reg (mode0, op0);
32736 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32737 op1 = copy_to_mode_reg (mode1, op1);
32739 pat = GEN_FCN (icode) (target, op0, op1);
32740 if (! pat)
32741 return 0;
32743 emit_insn (pat);
32745 return target;
32748 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32750 static rtx
32751 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32752 enum ix86_builtin_func_type m_type,
32753 enum rtx_code sub_code)
32755 rtx pat;
32756 int i;
32757 int nargs;
32758 bool comparison_p = false;
32759 bool tf_p = false;
32760 bool last_arg_constant = false;
32761 int num_memory = 0;
32762 struct {
32763 rtx op;
32764 enum machine_mode mode;
32765 } args[4];
32767 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32769 switch (m_type)
32771 case MULTI_ARG_4_DF2_DI_I:
32772 case MULTI_ARG_4_DF2_DI_I1:
32773 case MULTI_ARG_4_SF2_SI_I:
32774 case MULTI_ARG_4_SF2_SI_I1:
32775 nargs = 4;
32776 last_arg_constant = true;
32777 break;
32779 case MULTI_ARG_3_SF:
32780 case MULTI_ARG_3_DF:
32781 case MULTI_ARG_3_SF2:
32782 case MULTI_ARG_3_DF2:
32783 case MULTI_ARG_3_DI:
32784 case MULTI_ARG_3_SI:
32785 case MULTI_ARG_3_SI_DI:
32786 case MULTI_ARG_3_HI:
32787 case MULTI_ARG_3_HI_SI:
32788 case MULTI_ARG_3_QI:
32789 case MULTI_ARG_3_DI2:
32790 case MULTI_ARG_3_SI2:
32791 case MULTI_ARG_3_HI2:
32792 case MULTI_ARG_3_QI2:
32793 nargs = 3;
32794 break;
32796 case MULTI_ARG_2_SF:
32797 case MULTI_ARG_2_DF:
32798 case MULTI_ARG_2_DI:
32799 case MULTI_ARG_2_SI:
32800 case MULTI_ARG_2_HI:
32801 case MULTI_ARG_2_QI:
32802 nargs = 2;
32803 break;
32805 case MULTI_ARG_2_DI_IMM:
32806 case MULTI_ARG_2_SI_IMM:
32807 case MULTI_ARG_2_HI_IMM:
32808 case MULTI_ARG_2_QI_IMM:
32809 nargs = 2;
32810 last_arg_constant = true;
32811 break;
32813 case MULTI_ARG_1_SF:
32814 case MULTI_ARG_1_DF:
32815 case MULTI_ARG_1_SF2:
32816 case MULTI_ARG_1_DF2:
32817 case MULTI_ARG_1_DI:
32818 case MULTI_ARG_1_SI:
32819 case MULTI_ARG_1_HI:
32820 case MULTI_ARG_1_QI:
32821 case MULTI_ARG_1_SI_DI:
32822 case MULTI_ARG_1_HI_DI:
32823 case MULTI_ARG_1_HI_SI:
32824 case MULTI_ARG_1_QI_DI:
32825 case MULTI_ARG_1_QI_SI:
32826 case MULTI_ARG_1_QI_HI:
32827 nargs = 1;
32828 break;
32830 case MULTI_ARG_2_DI_CMP:
32831 case MULTI_ARG_2_SI_CMP:
32832 case MULTI_ARG_2_HI_CMP:
32833 case MULTI_ARG_2_QI_CMP:
32834 nargs = 2;
32835 comparison_p = true;
32836 break;
32838 case MULTI_ARG_2_SF_TF:
32839 case MULTI_ARG_2_DF_TF:
32840 case MULTI_ARG_2_DI_TF:
32841 case MULTI_ARG_2_SI_TF:
32842 case MULTI_ARG_2_HI_TF:
32843 case MULTI_ARG_2_QI_TF:
32844 nargs = 2;
32845 tf_p = true;
32846 break;
32848 default:
32849 gcc_unreachable ();
32852 if (optimize || !target
32853 || GET_MODE (target) != tmode
32854 || !insn_data[icode].operand[0].predicate (target, tmode))
32855 target = gen_reg_rtx (tmode);
32857 gcc_assert (nargs <= 4);
32859 for (i = 0; i < nargs; i++)
32861 tree arg = CALL_EXPR_ARG (exp, i);
32862 rtx op = expand_normal (arg);
32863 int adjust = (comparison_p) ? 1 : 0;
32864 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32866 if (last_arg_constant && i == nargs - 1)
32868 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32870 enum insn_code new_icode = icode;
32871 switch (icode)
32873 case CODE_FOR_xop_vpermil2v2df3:
32874 case CODE_FOR_xop_vpermil2v4sf3:
32875 case CODE_FOR_xop_vpermil2v4df3:
32876 case CODE_FOR_xop_vpermil2v8sf3:
32877 error ("the last argument must be a 2-bit immediate");
32878 return gen_reg_rtx (tmode);
32879 case CODE_FOR_xop_rotlv2di3:
32880 new_icode = CODE_FOR_rotlv2di3;
32881 goto xop_rotl;
32882 case CODE_FOR_xop_rotlv4si3:
32883 new_icode = CODE_FOR_rotlv4si3;
32884 goto xop_rotl;
32885 case CODE_FOR_xop_rotlv8hi3:
32886 new_icode = CODE_FOR_rotlv8hi3;
32887 goto xop_rotl;
32888 case CODE_FOR_xop_rotlv16qi3:
32889 new_icode = CODE_FOR_rotlv16qi3;
32890 xop_rotl:
32891 if (CONST_INT_P (op))
32893 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32894 op = GEN_INT (INTVAL (op) & mask);
32895 gcc_checking_assert
32896 (insn_data[icode].operand[i + 1].predicate (op, mode));
32898 else
32900 gcc_checking_assert
32901 (nargs == 2
32902 && insn_data[new_icode].operand[0].mode == tmode
32903 && insn_data[new_icode].operand[1].mode == tmode
32904 && insn_data[new_icode].operand[2].mode == mode
32905 && insn_data[new_icode].operand[0].predicate
32906 == insn_data[icode].operand[0].predicate
32907 && insn_data[new_icode].operand[1].predicate
32908 == insn_data[icode].operand[1].predicate);
32909 icode = new_icode;
32910 goto non_constant;
32912 break;
32913 default:
32914 gcc_unreachable ();
32918 else
32920 non_constant:
32921 if (VECTOR_MODE_P (mode))
32922 op = safe_vector_operand (op, mode);
32924 /* If we aren't optimizing, only allow one memory operand to be
32925 generated. */
32926 if (memory_operand (op, mode))
32927 num_memory++;
32929 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32931 if (optimize
32932 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32933 || num_memory > 1)
32934 op = force_reg (mode, op);
32937 args[i].op = op;
32938 args[i].mode = mode;
32941 switch (nargs)
32943 case 1:
32944 pat = GEN_FCN (icode) (target, args[0].op);
32945 break;
32947 case 2:
32948 if (tf_p)
32949 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32950 GEN_INT ((int)sub_code));
32951 else if (! comparison_p)
32952 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32953 else
32955 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32956 args[0].op,
32957 args[1].op);
32959 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32961 break;
32963 case 3:
32964 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32965 break;
32967 case 4:
32968 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32969 break;
32971 default:
32972 gcc_unreachable ();
32975 if (! pat)
32976 return 0;
32978 emit_insn (pat);
32979 return target;
32982 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32983 insns with vec_merge. */
32985 static rtx
32986 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32987 rtx target)
32989 rtx pat;
32990 tree arg0 = CALL_EXPR_ARG (exp, 0);
32991 rtx op1, op0 = expand_normal (arg0);
32992 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32993 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32995 if (optimize || !target
32996 || GET_MODE (target) != tmode
32997 || !insn_data[icode].operand[0].predicate (target, tmode))
32998 target = gen_reg_rtx (tmode);
33000 if (VECTOR_MODE_P (mode0))
33001 op0 = safe_vector_operand (op0, mode0);
33003 if ((optimize && !register_operand (op0, mode0))
33004 || !insn_data[icode].operand[1].predicate (op0, mode0))
33005 op0 = copy_to_mode_reg (mode0, op0);
33007 op1 = op0;
33008 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33009 op1 = copy_to_mode_reg (mode0, op1);
33011 pat = GEN_FCN (icode) (target, op0, op1);
33012 if (! pat)
33013 return 0;
33014 emit_insn (pat);
33015 return target;
33018 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33020 static rtx
33021 ix86_expand_sse_compare (const struct builtin_description *d,
33022 tree exp, rtx target, bool swap)
33024 rtx pat;
33025 tree arg0 = CALL_EXPR_ARG (exp, 0);
33026 tree arg1 = CALL_EXPR_ARG (exp, 1);
33027 rtx op0 = expand_normal (arg0);
33028 rtx op1 = expand_normal (arg1);
33029 rtx op2;
33030 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33031 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33032 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33033 enum rtx_code comparison = d->comparison;
33035 if (VECTOR_MODE_P (mode0))
33036 op0 = safe_vector_operand (op0, mode0);
33037 if (VECTOR_MODE_P (mode1))
33038 op1 = safe_vector_operand (op1, mode1);
33040 /* Swap operands if we have a comparison that isn't available in
33041 hardware. */
33042 if (swap)
33044 rtx tmp = gen_reg_rtx (mode1);
33045 emit_move_insn (tmp, op1);
33046 op1 = op0;
33047 op0 = tmp;
33050 if (optimize || !target
33051 || GET_MODE (target) != tmode
33052 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33053 target = gen_reg_rtx (tmode);
33055 if ((optimize && !register_operand (op0, mode0))
33056 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33057 op0 = copy_to_mode_reg (mode0, op0);
33058 if ((optimize && !register_operand (op1, mode1))
33059 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33060 op1 = copy_to_mode_reg (mode1, op1);
33062 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33063 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33064 if (! pat)
33065 return 0;
33066 emit_insn (pat);
33067 return target;
33070 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33072 static rtx
33073 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33074 rtx target)
33076 rtx pat;
33077 tree arg0 = CALL_EXPR_ARG (exp, 0);
33078 tree arg1 = CALL_EXPR_ARG (exp, 1);
33079 rtx op0 = expand_normal (arg0);
33080 rtx op1 = expand_normal (arg1);
33081 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33082 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33083 enum rtx_code comparison = d->comparison;
33085 if (VECTOR_MODE_P (mode0))
33086 op0 = safe_vector_operand (op0, mode0);
33087 if (VECTOR_MODE_P (mode1))
33088 op1 = safe_vector_operand (op1, mode1);
33090 /* Swap operands if we have a comparison that isn't available in
33091 hardware. */
33092 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33094 rtx tmp = op1;
33095 op1 = op0;
33096 op0 = tmp;
33099 target = gen_reg_rtx (SImode);
33100 emit_move_insn (target, const0_rtx);
33101 target = gen_rtx_SUBREG (QImode, target, 0);
33103 if ((optimize && !register_operand (op0, mode0))
33104 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33105 op0 = copy_to_mode_reg (mode0, op0);
33106 if ((optimize && !register_operand (op1, mode1))
33107 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33108 op1 = copy_to_mode_reg (mode1, op1);
33110 pat = GEN_FCN (d->icode) (op0, op1);
33111 if (! pat)
33112 return 0;
33113 emit_insn (pat);
33114 emit_insn (gen_rtx_SET (VOIDmode,
33115 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33116 gen_rtx_fmt_ee (comparison, QImode,
33117 SET_DEST (pat),
33118 const0_rtx)));
33120 return SUBREG_REG (target);
33123 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33125 static rtx
33126 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33127 rtx target)
33129 rtx pat;
33130 tree arg0 = CALL_EXPR_ARG (exp, 0);
33131 rtx op1, op0 = expand_normal (arg0);
33132 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33133 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33135 if (optimize || target == 0
33136 || GET_MODE (target) != tmode
33137 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33138 target = gen_reg_rtx (tmode);
33140 if (VECTOR_MODE_P (mode0))
33141 op0 = safe_vector_operand (op0, mode0);
33143 if ((optimize && !register_operand (op0, mode0))
33144 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33145 op0 = copy_to_mode_reg (mode0, op0);
33147 op1 = GEN_INT (d->comparison);
33149 pat = GEN_FCN (d->icode) (target, op0, op1);
33150 if (! pat)
33151 return 0;
33152 emit_insn (pat);
33153 return target;
33156 static rtx
33157 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33158 tree exp, rtx target)
33160 rtx pat;
33161 tree arg0 = CALL_EXPR_ARG (exp, 0);
33162 tree arg1 = CALL_EXPR_ARG (exp, 1);
33163 rtx op0 = expand_normal (arg0);
33164 rtx op1 = expand_normal (arg1);
33165 rtx op2;
33166 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33167 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33168 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33170 if (optimize || target == 0
33171 || GET_MODE (target) != tmode
33172 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33173 target = gen_reg_rtx (tmode);
33175 op0 = safe_vector_operand (op0, mode0);
33176 op1 = safe_vector_operand (op1, mode1);
33178 if ((optimize && !register_operand (op0, mode0))
33179 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33180 op0 = copy_to_mode_reg (mode0, op0);
33181 if ((optimize && !register_operand (op1, mode1))
33182 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33183 op1 = copy_to_mode_reg (mode1, op1);
33185 op2 = GEN_INT (d->comparison);
33187 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33188 if (! pat)
33189 return 0;
33190 emit_insn (pat);
33191 return target;
33194 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33196 static rtx
33197 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33198 rtx target)
33200 rtx pat;
33201 tree arg0 = CALL_EXPR_ARG (exp, 0);
33202 tree arg1 = CALL_EXPR_ARG (exp, 1);
33203 rtx op0 = expand_normal (arg0);
33204 rtx op1 = expand_normal (arg1);
33205 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33206 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33207 enum rtx_code comparison = d->comparison;
33209 if (VECTOR_MODE_P (mode0))
33210 op0 = safe_vector_operand (op0, mode0);
33211 if (VECTOR_MODE_P (mode1))
33212 op1 = safe_vector_operand (op1, mode1);
33214 target = gen_reg_rtx (SImode);
33215 emit_move_insn (target, const0_rtx);
33216 target = gen_rtx_SUBREG (QImode, target, 0);
33218 if ((optimize && !register_operand (op0, mode0))
33219 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33220 op0 = copy_to_mode_reg (mode0, op0);
33221 if ((optimize && !register_operand (op1, mode1))
33222 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33223 op1 = copy_to_mode_reg (mode1, op1);
33225 pat = GEN_FCN (d->icode) (op0, op1);
33226 if (! pat)
33227 return 0;
33228 emit_insn (pat);
33229 emit_insn (gen_rtx_SET (VOIDmode,
33230 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33231 gen_rtx_fmt_ee (comparison, QImode,
33232 SET_DEST (pat),
33233 const0_rtx)));
33235 return SUBREG_REG (target);
33238 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33240 static rtx
33241 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33242 tree exp, rtx target)
33244 rtx pat;
33245 tree arg0 = CALL_EXPR_ARG (exp, 0);
33246 tree arg1 = CALL_EXPR_ARG (exp, 1);
33247 tree arg2 = CALL_EXPR_ARG (exp, 2);
33248 tree arg3 = CALL_EXPR_ARG (exp, 3);
33249 tree arg4 = CALL_EXPR_ARG (exp, 4);
33250 rtx scratch0, scratch1;
33251 rtx op0 = expand_normal (arg0);
33252 rtx op1 = expand_normal (arg1);
33253 rtx op2 = expand_normal (arg2);
33254 rtx op3 = expand_normal (arg3);
33255 rtx op4 = expand_normal (arg4);
33256 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33258 tmode0 = insn_data[d->icode].operand[0].mode;
33259 tmode1 = insn_data[d->icode].operand[1].mode;
33260 modev2 = insn_data[d->icode].operand[2].mode;
33261 modei3 = insn_data[d->icode].operand[3].mode;
33262 modev4 = insn_data[d->icode].operand[4].mode;
33263 modei5 = insn_data[d->icode].operand[5].mode;
33264 modeimm = insn_data[d->icode].operand[6].mode;
33266 if (VECTOR_MODE_P (modev2))
33267 op0 = safe_vector_operand (op0, modev2);
33268 if (VECTOR_MODE_P (modev4))
33269 op2 = safe_vector_operand (op2, modev4);
33271 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33272 op0 = copy_to_mode_reg (modev2, op0);
33273 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33274 op1 = copy_to_mode_reg (modei3, op1);
33275 if ((optimize && !register_operand (op2, modev4))
33276 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33277 op2 = copy_to_mode_reg (modev4, op2);
33278 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33279 op3 = copy_to_mode_reg (modei5, op3);
33281 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33283 error ("the fifth argument must be an 8-bit immediate");
33284 return const0_rtx;
33287 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33289 if (optimize || !target
33290 || GET_MODE (target) != tmode0
33291 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33292 target = gen_reg_rtx (tmode0);
33294 scratch1 = gen_reg_rtx (tmode1);
33296 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33298 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33300 if (optimize || !target
33301 || GET_MODE (target) != tmode1
33302 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33303 target = gen_reg_rtx (tmode1);
33305 scratch0 = gen_reg_rtx (tmode0);
33307 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33309 else
33311 gcc_assert (d->flag);
33313 scratch0 = gen_reg_rtx (tmode0);
33314 scratch1 = gen_reg_rtx (tmode1);
33316 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33319 if (! pat)
33320 return 0;
33322 emit_insn (pat);
33324 if (d->flag)
33326 target = gen_reg_rtx (SImode);
33327 emit_move_insn (target, const0_rtx);
33328 target = gen_rtx_SUBREG (QImode, target, 0);
33330 emit_insn
33331 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33332 gen_rtx_fmt_ee (EQ, QImode,
33333 gen_rtx_REG ((enum machine_mode) d->flag,
33334 FLAGS_REG),
33335 const0_rtx)));
33336 return SUBREG_REG (target);
33338 else
33339 return target;
33343 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33345 static rtx
33346 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33347 tree exp, rtx target)
33349 rtx pat;
33350 tree arg0 = CALL_EXPR_ARG (exp, 0);
33351 tree arg1 = CALL_EXPR_ARG (exp, 1);
33352 tree arg2 = CALL_EXPR_ARG (exp, 2);
33353 rtx scratch0, scratch1;
33354 rtx op0 = expand_normal (arg0);
33355 rtx op1 = expand_normal (arg1);
33356 rtx op2 = expand_normal (arg2);
33357 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33359 tmode0 = insn_data[d->icode].operand[0].mode;
33360 tmode1 = insn_data[d->icode].operand[1].mode;
33361 modev2 = insn_data[d->icode].operand[2].mode;
33362 modev3 = insn_data[d->icode].operand[3].mode;
33363 modeimm = insn_data[d->icode].operand[4].mode;
33365 if (VECTOR_MODE_P (modev2))
33366 op0 = safe_vector_operand (op0, modev2);
33367 if (VECTOR_MODE_P (modev3))
33368 op1 = safe_vector_operand (op1, modev3);
33370 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33371 op0 = copy_to_mode_reg (modev2, op0);
33372 if ((optimize && !register_operand (op1, modev3))
33373 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33374 op1 = copy_to_mode_reg (modev3, op1);
33376 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33378 error ("the third argument must be an 8-bit immediate");
33379 return const0_rtx;
33382 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33384 if (optimize || !target
33385 || GET_MODE (target) != tmode0
33386 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33387 target = gen_reg_rtx (tmode0);
33389 scratch1 = gen_reg_rtx (tmode1);
33391 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33393 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33395 if (optimize || !target
33396 || GET_MODE (target) != tmode1
33397 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33398 target = gen_reg_rtx (tmode1);
33400 scratch0 = gen_reg_rtx (tmode0);
33402 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33404 else
33406 gcc_assert (d->flag);
33408 scratch0 = gen_reg_rtx (tmode0);
33409 scratch1 = gen_reg_rtx (tmode1);
33411 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33414 if (! pat)
33415 return 0;
33417 emit_insn (pat);
33419 if (d->flag)
33421 target = gen_reg_rtx (SImode);
33422 emit_move_insn (target, const0_rtx);
33423 target = gen_rtx_SUBREG (QImode, target, 0);
33425 emit_insn
33426 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33427 gen_rtx_fmt_ee (EQ, QImode,
33428 gen_rtx_REG ((enum machine_mode) d->flag,
33429 FLAGS_REG),
33430 const0_rtx)));
33431 return SUBREG_REG (target);
33433 else
33434 return target;
33437 /* Subroutine of ix86_expand_builtin to take care of insns with
33438 variable number of operands. */
33440 static rtx
33441 ix86_expand_args_builtin (const struct builtin_description *d,
33442 tree exp, rtx target)
33444 rtx pat, real_target;
33445 unsigned int i, nargs;
33446 unsigned int nargs_constant = 0;
33447 unsigned int mask_pos = 0;
33448 int num_memory = 0;
33449 struct
33451 rtx op;
33452 enum machine_mode mode;
33453 } args[6];
33454 bool last_arg_count = false;
33455 enum insn_code icode = d->icode;
33456 const struct insn_data_d *insn_p = &insn_data[icode];
33457 enum machine_mode tmode = insn_p->operand[0].mode;
33458 enum machine_mode rmode = VOIDmode;
33459 bool swap = false;
33460 enum rtx_code comparison = d->comparison;
33462 switch ((enum ix86_builtin_func_type) d->flag)
33464 case V2DF_FTYPE_V2DF_ROUND:
33465 case V4DF_FTYPE_V4DF_ROUND:
33466 case V4SF_FTYPE_V4SF_ROUND:
33467 case V8SF_FTYPE_V8SF_ROUND:
33468 case V4SI_FTYPE_V4SF_ROUND:
33469 case V8SI_FTYPE_V8SF_ROUND:
33470 return ix86_expand_sse_round (d, exp, target);
33471 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33472 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33473 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33474 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33475 case INT_FTYPE_V8SF_V8SF_PTEST:
33476 case INT_FTYPE_V4DI_V4DI_PTEST:
33477 case INT_FTYPE_V4DF_V4DF_PTEST:
33478 case INT_FTYPE_V4SF_V4SF_PTEST:
33479 case INT_FTYPE_V2DI_V2DI_PTEST:
33480 case INT_FTYPE_V2DF_V2DF_PTEST:
33481 return ix86_expand_sse_ptest (d, exp, target);
33482 case FLOAT128_FTYPE_FLOAT128:
33483 case FLOAT_FTYPE_FLOAT:
33484 case INT_FTYPE_INT:
33485 case UINT64_FTYPE_INT:
33486 case UINT16_FTYPE_UINT16:
33487 case INT64_FTYPE_INT64:
33488 case INT64_FTYPE_V4SF:
33489 case INT64_FTYPE_V2DF:
33490 case INT_FTYPE_V16QI:
33491 case INT_FTYPE_V8QI:
33492 case INT_FTYPE_V8SF:
33493 case INT_FTYPE_V4DF:
33494 case INT_FTYPE_V4SF:
33495 case INT_FTYPE_V2DF:
33496 case INT_FTYPE_V32QI:
33497 case V16QI_FTYPE_V16QI:
33498 case V8SI_FTYPE_V8SF:
33499 case V8SI_FTYPE_V4SI:
33500 case V8HI_FTYPE_V8HI:
33501 case V8HI_FTYPE_V16QI:
33502 case V8QI_FTYPE_V8QI:
33503 case V8SF_FTYPE_V8SF:
33504 case V8SF_FTYPE_V8SI:
33505 case V8SF_FTYPE_V4SF:
33506 case V8SF_FTYPE_V8HI:
33507 case V4SI_FTYPE_V4SI:
33508 case V4SI_FTYPE_V16QI:
33509 case V4SI_FTYPE_V4SF:
33510 case V4SI_FTYPE_V8SI:
33511 case V4SI_FTYPE_V8HI:
33512 case V4SI_FTYPE_V4DF:
33513 case V4SI_FTYPE_V2DF:
33514 case V4HI_FTYPE_V4HI:
33515 case V4DF_FTYPE_V4DF:
33516 case V4DF_FTYPE_V4SI:
33517 case V4DF_FTYPE_V4SF:
33518 case V4DF_FTYPE_V2DF:
33519 case V4SF_FTYPE_V4SF:
33520 case V4SF_FTYPE_V4SI:
33521 case V4SF_FTYPE_V8SF:
33522 case V4SF_FTYPE_V4DF:
33523 case V4SF_FTYPE_V8HI:
33524 case V4SF_FTYPE_V2DF:
33525 case V2DI_FTYPE_V2DI:
33526 case V2DI_FTYPE_V16QI:
33527 case V2DI_FTYPE_V8HI:
33528 case V2DI_FTYPE_V4SI:
33529 case V2DF_FTYPE_V2DF:
33530 case V2DF_FTYPE_V4SI:
33531 case V2DF_FTYPE_V4DF:
33532 case V2DF_FTYPE_V4SF:
33533 case V2DF_FTYPE_V2SI:
33534 case V2SI_FTYPE_V2SI:
33535 case V2SI_FTYPE_V4SF:
33536 case V2SI_FTYPE_V2SF:
33537 case V2SI_FTYPE_V2DF:
33538 case V2SF_FTYPE_V2SF:
33539 case V2SF_FTYPE_V2SI:
33540 case V32QI_FTYPE_V32QI:
33541 case V32QI_FTYPE_V16QI:
33542 case V16HI_FTYPE_V16HI:
33543 case V16HI_FTYPE_V8HI:
33544 case V8SI_FTYPE_V8SI:
33545 case V16HI_FTYPE_V16QI:
33546 case V8SI_FTYPE_V16QI:
33547 case V4DI_FTYPE_V16QI:
33548 case V8SI_FTYPE_V8HI:
33549 case V4DI_FTYPE_V8HI:
33550 case V4DI_FTYPE_V4SI:
33551 case V4DI_FTYPE_V2DI:
33552 case HI_FTYPE_HI:
33553 case UINT_FTYPE_V2DF:
33554 case UINT_FTYPE_V4SF:
33555 case UINT64_FTYPE_V2DF:
33556 case UINT64_FTYPE_V4SF:
33557 case V16QI_FTYPE_V8DI:
33558 case V16HI_FTYPE_V16SI:
33559 case V16SI_FTYPE_HI:
33560 case V16SI_FTYPE_V16SI:
33561 case V16SI_FTYPE_INT:
33562 case V16SF_FTYPE_FLOAT:
33563 case V16SF_FTYPE_V4SF:
33564 case V16SF_FTYPE_V16SF:
33565 case V8HI_FTYPE_V8DI:
33566 case V8UHI_FTYPE_V8UHI:
33567 case V8SI_FTYPE_V8DI:
33568 case V8USI_FTYPE_V8USI:
33569 case V8SF_FTYPE_V8DF:
33570 case V8DI_FTYPE_QI:
33571 case V8DI_FTYPE_INT64:
33572 case V8DI_FTYPE_V4DI:
33573 case V8DI_FTYPE_V8DI:
33574 case V8DF_FTYPE_DOUBLE:
33575 case V8DF_FTYPE_V4DF:
33576 case V8DF_FTYPE_V8DF:
33577 case V8DF_FTYPE_V8SI:
33578 nargs = 1;
33579 break;
33580 case V4SF_FTYPE_V4SF_VEC_MERGE:
33581 case V2DF_FTYPE_V2DF_VEC_MERGE:
33582 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33583 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33584 case V16QI_FTYPE_V16QI_V16QI:
33585 case V16QI_FTYPE_V8HI_V8HI:
33586 case V16SI_FTYPE_V16SI_V16SI:
33587 case V16SF_FTYPE_V16SF_V16SF:
33588 case V16SF_FTYPE_V16SF_V16SI:
33589 case V8QI_FTYPE_V8QI_V8QI:
33590 case V8QI_FTYPE_V4HI_V4HI:
33591 case V8HI_FTYPE_V8HI_V8HI:
33592 case V8HI_FTYPE_V16QI_V16QI:
33593 case V8HI_FTYPE_V4SI_V4SI:
33594 case V8SF_FTYPE_V8SF_V8SF:
33595 case V8SF_FTYPE_V8SF_V8SI:
33596 case V8DI_FTYPE_V8DI_V8DI:
33597 case V8DF_FTYPE_V8DF_V8DF:
33598 case V8DF_FTYPE_V8DF_V8DI:
33599 case V4SI_FTYPE_V4SI_V4SI:
33600 case V4SI_FTYPE_V8HI_V8HI:
33601 case V4SI_FTYPE_V4SF_V4SF:
33602 case V4SI_FTYPE_V2DF_V2DF:
33603 case V4HI_FTYPE_V4HI_V4HI:
33604 case V4HI_FTYPE_V8QI_V8QI:
33605 case V4HI_FTYPE_V2SI_V2SI:
33606 case V4DF_FTYPE_V4DF_V4DF:
33607 case V4DF_FTYPE_V4DF_V4DI:
33608 case V4SF_FTYPE_V4SF_V4SF:
33609 case V4SF_FTYPE_V4SF_V4SI:
33610 case V4SF_FTYPE_V4SF_V2SI:
33611 case V4SF_FTYPE_V4SF_V2DF:
33612 case V4SF_FTYPE_V4SF_UINT:
33613 case V4SF_FTYPE_V4SF_UINT64:
33614 case V4SF_FTYPE_V4SF_DI:
33615 case V4SF_FTYPE_V4SF_SI:
33616 case V2DI_FTYPE_V2DI_V2DI:
33617 case V2DI_FTYPE_V16QI_V16QI:
33618 case V2DI_FTYPE_V4SI_V4SI:
33619 case V2UDI_FTYPE_V4USI_V4USI:
33620 case V2DI_FTYPE_V2DI_V16QI:
33621 case V2DI_FTYPE_V2DF_V2DF:
33622 case V2SI_FTYPE_V2SI_V2SI:
33623 case V2SI_FTYPE_V4HI_V4HI:
33624 case V2SI_FTYPE_V2SF_V2SF:
33625 case V2DF_FTYPE_V2DF_V2DF:
33626 case V2DF_FTYPE_V2DF_V4SF:
33627 case V2DF_FTYPE_V2DF_V2DI:
33628 case V2DF_FTYPE_V2DF_DI:
33629 case V2DF_FTYPE_V2DF_SI:
33630 case V2DF_FTYPE_V2DF_UINT:
33631 case V2DF_FTYPE_V2DF_UINT64:
33632 case V2SF_FTYPE_V2SF_V2SF:
33633 case V1DI_FTYPE_V1DI_V1DI:
33634 case V1DI_FTYPE_V8QI_V8QI:
33635 case V1DI_FTYPE_V2SI_V2SI:
33636 case V32QI_FTYPE_V16HI_V16HI:
33637 case V16HI_FTYPE_V8SI_V8SI:
33638 case V32QI_FTYPE_V32QI_V32QI:
33639 case V16HI_FTYPE_V32QI_V32QI:
33640 case V16HI_FTYPE_V16HI_V16HI:
33641 case V8SI_FTYPE_V4DF_V4DF:
33642 case V8SI_FTYPE_V8SI_V8SI:
33643 case V8SI_FTYPE_V16HI_V16HI:
33644 case V4DI_FTYPE_V4DI_V4DI:
33645 case V4DI_FTYPE_V8SI_V8SI:
33646 case V4UDI_FTYPE_V8USI_V8USI:
33647 case QI_FTYPE_V8DI_V8DI:
33648 case HI_FTYPE_V16SI_V16SI:
33649 if (comparison == UNKNOWN)
33650 return ix86_expand_binop_builtin (icode, exp, target);
33651 nargs = 2;
33652 break;
33653 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33654 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33655 gcc_assert (comparison != UNKNOWN);
33656 nargs = 2;
33657 swap = true;
33658 break;
33659 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33660 case V16HI_FTYPE_V16HI_SI_COUNT:
33661 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33662 case V8SI_FTYPE_V8SI_SI_COUNT:
33663 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33664 case V4DI_FTYPE_V4DI_INT_COUNT:
33665 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33666 case V8HI_FTYPE_V8HI_SI_COUNT:
33667 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33668 case V4SI_FTYPE_V4SI_SI_COUNT:
33669 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33670 case V4HI_FTYPE_V4HI_SI_COUNT:
33671 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33672 case V2DI_FTYPE_V2DI_SI_COUNT:
33673 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33674 case V2SI_FTYPE_V2SI_SI_COUNT:
33675 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33676 case V1DI_FTYPE_V1DI_SI_COUNT:
33677 nargs = 2;
33678 last_arg_count = true;
33679 break;
33680 case UINT64_FTYPE_UINT64_UINT64:
33681 case UINT_FTYPE_UINT_UINT:
33682 case UINT_FTYPE_UINT_USHORT:
33683 case UINT_FTYPE_UINT_UCHAR:
33684 case UINT16_FTYPE_UINT16_INT:
33685 case UINT8_FTYPE_UINT8_INT:
33686 case HI_FTYPE_HI_HI:
33687 case V16SI_FTYPE_V8DF_V8DF:
33688 nargs = 2;
33689 break;
33690 case V2DI_FTYPE_V2DI_INT_CONVERT:
33691 nargs = 2;
33692 rmode = V1TImode;
33693 nargs_constant = 1;
33694 break;
33695 case V4DI_FTYPE_V4DI_INT_CONVERT:
33696 nargs = 2;
33697 rmode = V2TImode;
33698 nargs_constant = 1;
33699 break;
33700 case V8HI_FTYPE_V8HI_INT:
33701 case V8HI_FTYPE_V8SF_INT:
33702 case V16HI_FTYPE_V16SF_INT:
33703 case V8HI_FTYPE_V4SF_INT:
33704 case V8SF_FTYPE_V8SF_INT:
33705 case V4SF_FTYPE_V16SF_INT:
33706 case V16SF_FTYPE_V16SF_INT:
33707 case V4SI_FTYPE_V4SI_INT:
33708 case V4SI_FTYPE_V8SI_INT:
33709 case V4HI_FTYPE_V4HI_INT:
33710 case V4DF_FTYPE_V4DF_INT:
33711 case V4DF_FTYPE_V8DF_INT:
33712 case V4SF_FTYPE_V4SF_INT:
33713 case V4SF_FTYPE_V8SF_INT:
33714 case V2DI_FTYPE_V2DI_INT:
33715 case V2DF_FTYPE_V2DF_INT:
33716 case V2DF_FTYPE_V4DF_INT:
33717 case V16HI_FTYPE_V16HI_INT:
33718 case V8SI_FTYPE_V8SI_INT:
33719 case V16SI_FTYPE_V16SI_INT:
33720 case V4SI_FTYPE_V16SI_INT:
33721 case V4DI_FTYPE_V4DI_INT:
33722 case V2DI_FTYPE_V4DI_INT:
33723 case V4DI_FTYPE_V8DI_INT:
33724 case HI_FTYPE_HI_INT:
33725 nargs = 2;
33726 nargs_constant = 1;
33727 break;
33728 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33729 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33730 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33731 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33732 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33733 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33734 case HI_FTYPE_V16SI_V16SI_HI:
33735 case QI_FTYPE_V8DI_V8DI_QI:
33736 case V16HI_FTYPE_V16SI_V16HI_HI:
33737 case V16QI_FTYPE_V16SI_V16QI_HI:
33738 case V16QI_FTYPE_V8DI_V16QI_QI:
33739 case V16SF_FTYPE_V16SF_V16SF_HI:
33740 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33741 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33742 case V16SF_FTYPE_V16SI_V16SF_HI:
33743 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33744 case V16SF_FTYPE_V4SF_V16SF_HI:
33745 case V16SI_FTYPE_SI_V16SI_HI:
33746 case V16SI_FTYPE_V16HI_V16SI_HI:
33747 case V16SI_FTYPE_V16QI_V16SI_HI:
33748 case V16SI_FTYPE_V16SF_V16SI_HI:
33749 case V16SI_FTYPE_V16SI_V16SI_HI:
33750 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33751 case V16SI_FTYPE_V4SI_V16SI_HI:
33752 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33753 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33754 case V8DF_FTYPE_V2DF_V8DF_QI:
33755 case V8DF_FTYPE_V4DF_V8DF_QI:
33756 case V8DF_FTYPE_V8DF_V8DF_QI:
33757 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33758 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33759 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33760 case V8DF_FTYPE_V8SF_V8DF_QI:
33761 case V8DF_FTYPE_V8SI_V8DF_QI:
33762 case V8DI_FTYPE_DI_V8DI_QI:
33763 case V8DI_FTYPE_V16QI_V8DI_QI:
33764 case V8DI_FTYPE_V2DI_V8DI_QI:
33765 case V8DI_FTYPE_V4DI_V8DI_QI:
33766 case V8DI_FTYPE_V8DI_V8DI_QI:
33767 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33768 case V8DI_FTYPE_V8HI_V8DI_QI:
33769 case V8DI_FTYPE_V8SI_V8DI_QI:
33770 case V8HI_FTYPE_V8DI_V8HI_QI:
33771 case V8SF_FTYPE_V8DF_V8SF_QI:
33772 case V8SI_FTYPE_V8DF_V8SI_QI:
33773 case V8SI_FTYPE_V8DI_V8SI_QI:
33774 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33775 nargs = 3;
33776 break;
33777 case V32QI_FTYPE_V32QI_V32QI_INT:
33778 case V16HI_FTYPE_V16HI_V16HI_INT:
33779 case V16QI_FTYPE_V16QI_V16QI_INT:
33780 case V4DI_FTYPE_V4DI_V4DI_INT:
33781 case V8HI_FTYPE_V8HI_V8HI_INT:
33782 case V8SI_FTYPE_V8SI_V8SI_INT:
33783 case V8SI_FTYPE_V8SI_V4SI_INT:
33784 case V8SF_FTYPE_V8SF_V8SF_INT:
33785 case V8SF_FTYPE_V8SF_V4SF_INT:
33786 case V4SI_FTYPE_V4SI_V4SI_INT:
33787 case V4DF_FTYPE_V4DF_V4DF_INT:
33788 case V16SF_FTYPE_V16SF_V16SF_INT:
33789 case V16SF_FTYPE_V16SF_V4SF_INT:
33790 case V16SI_FTYPE_V16SI_V4SI_INT:
33791 case V4DF_FTYPE_V4DF_V2DF_INT:
33792 case V4SF_FTYPE_V4SF_V4SF_INT:
33793 case V2DI_FTYPE_V2DI_V2DI_INT:
33794 case V4DI_FTYPE_V4DI_V2DI_INT:
33795 case V2DF_FTYPE_V2DF_V2DF_INT:
33796 case QI_FTYPE_V8DI_V8DI_INT:
33797 case QI_FTYPE_V8DF_V8DF_INT:
33798 case QI_FTYPE_V2DF_V2DF_INT:
33799 case QI_FTYPE_V4SF_V4SF_INT:
33800 case HI_FTYPE_V16SI_V16SI_INT:
33801 case HI_FTYPE_V16SF_V16SF_INT:
33802 nargs = 3;
33803 nargs_constant = 1;
33804 break;
33805 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33806 nargs = 3;
33807 rmode = V4DImode;
33808 nargs_constant = 1;
33809 break;
33810 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33811 nargs = 3;
33812 rmode = V2DImode;
33813 nargs_constant = 1;
33814 break;
33815 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33816 nargs = 3;
33817 rmode = DImode;
33818 nargs_constant = 1;
33819 break;
33820 case V2DI_FTYPE_V2DI_UINT_UINT:
33821 nargs = 3;
33822 nargs_constant = 2;
33823 break;
33824 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33825 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33826 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33827 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33828 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33829 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33830 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33831 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33832 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33833 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33834 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33835 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33836 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33837 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33838 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33839 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33840 nargs = 4;
33841 break;
33842 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33843 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33844 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33845 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33846 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33847 nargs = 4;
33848 nargs_constant = 1;
33849 break;
33850 case QI_FTYPE_V2DF_V2DF_INT_QI:
33851 case QI_FTYPE_V4SF_V4SF_INT_QI:
33852 nargs = 4;
33853 mask_pos = 1;
33854 nargs_constant = 1;
33855 break;
33856 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33857 nargs = 4;
33858 nargs_constant = 2;
33859 break;
33860 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33861 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33862 nargs = 4;
33863 break;
33864 case QI_FTYPE_V8DI_V8DI_INT_QI:
33865 case HI_FTYPE_V16SI_V16SI_INT_HI:
33866 case QI_FTYPE_V8DF_V8DF_INT_QI:
33867 case HI_FTYPE_V16SF_V16SF_INT_HI:
33868 mask_pos = 1;
33869 nargs = 4;
33870 nargs_constant = 1;
33871 break;
33872 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33873 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33874 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33875 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33876 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33877 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33878 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33879 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33880 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33881 nargs = 4;
33882 mask_pos = 2;
33883 nargs_constant = 1;
33884 break;
33885 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33886 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33887 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33888 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33889 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33890 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33891 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33892 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33893 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33894 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33895 nargs = 5;
33896 mask_pos = 2;
33897 nargs_constant = 1;
33898 break;
33899 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33900 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33901 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33902 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33903 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33904 nargs = 5;
33905 mask_pos = 1;
33906 nargs_constant = 1;
33907 break;
33909 default:
33910 gcc_unreachable ();
33913 gcc_assert (nargs <= ARRAY_SIZE (args));
33915 if (comparison != UNKNOWN)
33917 gcc_assert (nargs == 2);
33918 return ix86_expand_sse_compare (d, exp, target, swap);
33921 if (rmode == VOIDmode || rmode == tmode)
33923 if (optimize
33924 || target == 0
33925 || GET_MODE (target) != tmode
33926 || !insn_p->operand[0].predicate (target, tmode))
33927 target = gen_reg_rtx (tmode);
33928 real_target = target;
33930 else
33932 real_target = gen_reg_rtx (tmode);
33933 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
33936 for (i = 0; i < nargs; i++)
33938 tree arg = CALL_EXPR_ARG (exp, i);
33939 rtx op = expand_normal (arg);
33940 enum machine_mode mode = insn_p->operand[i + 1].mode;
33941 bool match = insn_p->operand[i + 1].predicate (op, mode);
33943 if (last_arg_count && (i + 1) == nargs)
33945 /* SIMD shift insns take either an 8-bit immediate or
33946 register as count. But builtin functions take int as
33947 count. If count doesn't match, we put it in register. */
33948 if (!match)
33950 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
33951 if (!insn_p->operand[i + 1].predicate (op, mode))
33952 op = copy_to_reg (op);
33955 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33956 (!mask_pos && (nargs - i) <= nargs_constant))
33958 if (!match)
33959 switch (icode)
33961 case CODE_FOR_avx2_inserti128:
33962 case CODE_FOR_avx2_extracti128:
33963 error ("the last argument must be an 1-bit immediate");
33964 return const0_rtx;
33966 case CODE_FOR_avx512f_cmpv8di3_mask:
33967 case CODE_FOR_avx512f_cmpv16si3_mask:
33968 case CODE_FOR_avx512f_ucmpv8di3_mask:
33969 case CODE_FOR_avx512f_ucmpv16si3_mask:
33970 error ("the last argument must be a 3-bit immediate");
33971 return const0_rtx;
33973 case CODE_FOR_sse4_1_roundsd:
33974 case CODE_FOR_sse4_1_roundss:
33976 case CODE_FOR_sse4_1_roundpd:
33977 case CODE_FOR_sse4_1_roundps:
33978 case CODE_FOR_avx_roundpd256:
33979 case CODE_FOR_avx_roundps256:
33981 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
33982 case CODE_FOR_sse4_1_roundps_sfix:
33983 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
33984 case CODE_FOR_avx_roundps_sfix256:
33986 case CODE_FOR_sse4_1_blendps:
33987 case CODE_FOR_avx_blendpd256:
33988 case CODE_FOR_avx_vpermilv4df:
33989 case CODE_FOR_avx512f_getmantv8df_mask:
33990 case CODE_FOR_avx512f_getmantv16sf_mask:
33991 error ("the last argument must be a 4-bit immediate");
33992 return const0_rtx;
33994 case CODE_FOR_sha1rnds4:
33995 case CODE_FOR_sse4_1_blendpd:
33996 case CODE_FOR_avx_vpermilv2df:
33997 case CODE_FOR_xop_vpermil2v2df3:
33998 case CODE_FOR_xop_vpermil2v4sf3:
33999 case CODE_FOR_xop_vpermil2v4df3:
34000 case CODE_FOR_xop_vpermil2v8sf3:
34001 case CODE_FOR_avx512f_vinsertf32x4_mask:
34002 case CODE_FOR_avx512f_vinserti32x4_mask:
34003 case CODE_FOR_avx512f_vextractf32x4_mask:
34004 case CODE_FOR_avx512f_vextracti32x4_mask:
34005 error ("the last argument must be a 2-bit immediate");
34006 return const0_rtx;
34008 case CODE_FOR_avx_vextractf128v4df:
34009 case CODE_FOR_avx_vextractf128v8sf:
34010 case CODE_FOR_avx_vextractf128v8si:
34011 case CODE_FOR_avx_vinsertf128v4df:
34012 case CODE_FOR_avx_vinsertf128v8sf:
34013 case CODE_FOR_avx_vinsertf128v8si:
34014 case CODE_FOR_avx512f_vinsertf64x4_mask:
34015 case CODE_FOR_avx512f_vinserti64x4_mask:
34016 case CODE_FOR_avx512f_vextractf64x4_mask:
34017 case CODE_FOR_avx512f_vextracti64x4_mask:
34018 error ("the last argument must be a 1-bit immediate");
34019 return const0_rtx;
34021 case CODE_FOR_avx_vmcmpv2df3:
34022 case CODE_FOR_avx_vmcmpv4sf3:
34023 case CODE_FOR_avx_cmpv2df3:
34024 case CODE_FOR_avx_cmpv4sf3:
34025 case CODE_FOR_avx_cmpv4df3:
34026 case CODE_FOR_avx_cmpv8sf3:
34027 case CODE_FOR_avx512f_cmpv8df3_mask:
34028 case CODE_FOR_avx512f_cmpv16sf3_mask:
34029 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34030 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34031 error ("the last argument must be a 5-bit immediate");
34032 return const0_rtx;
34034 default:
34035 switch (nargs_constant)
34037 case 2:
34038 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34039 (!mask_pos && (nargs - i) == nargs_constant))
34041 error ("the next to last argument must be an 8-bit immediate");
34042 break;
34044 case 1:
34045 error ("the last argument must be an 8-bit immediate");
34046 break;
34047 default:
34048 gcc_unreachable ();
34050 return const0_rtx;
34053 else
34055 if (VECTOR_MODE_P (mode))
34056 op = safe_vector_operand (op, mode);
34058 /* If we aren't optimizing, only allow one memory operand to
34059 be generated. */
34060 if (memory_operand (op, mode))
34061 num_memory++;
34063 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34065 if (optimize || !match || num_memory > 1)
34066 op = copy_to_mode_reg (mode, op);
34068 else
34070 op = copy_to_reg (op);
34071 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34075 args[i].op = op;
34076 args[i].mode = mode;
34079 switch (nargs)
34081 case 1:
34082 pat = GEN_FCN (icode) (real_target, args[0].op);
34083 break;
34084 case 2:
34085 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34086 break;
34087 case 3:
34088 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34089 args[2].op);
34090 break;
34091 case 4:
34092 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34093 args[2].op, args[3].op);
34094 break;
34095 case 5:
34096 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34097 args[2].op, args[3].op, args[4].op);
34098 case 6:
34099 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34100 args[2].op, args[3].op, args[4].op,
34101 args[5].op);
34102 break;
34103 default:
34104 gcc_unreachable ();
34107 if (! pat)
34108 return 0;
34110 emit_insn (pat);
34111 return target;
34114 /* Transform pattern of following layout:
34115 (parallel [
34116 set (A B)
34117 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34119 into:
34120 (set (A B))
34123 (parallel [ A B
34125 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34128 into:
34129 (parallel [ A B ... ]) */
34131 static rtx
34132 ix86_erase_embedded_rounding (rtx pat)
34134 if (GET_CODE (pat) == INSN)
34135 pat = PATTERN (pat);
34137 gcc_assert (GET_CODE (pat) == PARALLEL);
34139 if (XVECLEN (pat, 0) == 2)
34141 rtx p0 = XVECEXP (pat, 0, 0);
34142 rtx p1 = XVECEXP (pat, 0, 1);
34144 gcc_assert (GET_CODE (p0) == SET
34145 && GET_CODE (p1) == UNSPEC
34146 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34148 return p0;
34150 else
34152 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34153 int i = 0;
34154 int j = 0;
34156 for (; i < XVECLEN (pat, 0); ++i)
34158 rtx elem = XVECEXP (pat, 0, i);
34159 if (GET_CODE (elem) != UNSPEC
34160 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34161 res [j++] = elem;
34164 /* No more than 1 occurence was removed. */
34165 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34167 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34171 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34172 with rounding. */
34173 static rtx
34174 ix86_expand_sse_comi_round (const struct builtin_description *d,
34175 tree exp, rtx target)
34177 rtx pat, set_dst;
34178 tree arg0 = CALL_EXPR_ARG (exp, 0);
34179 tree arg1 = CALL_EXPR_ARG (exp, 1);
34180 tree arg2 = CALL_EXPR_ARG (exp, 2);
34181 tree arg3 = CALL_EXPR_ARG (exp, 3);
34182 rtx op0 = expand_normal (arg0);
34183 rtx op1 = expand_normal (arg1);
34184 rtx op2 = expand_normal (arg2);
34185 rtx op3 = expand_normal (arg3);
34186 enum insn_code icode = d->icode;
34187 const struct insn_data_d *insn_p = &insn_data[icode];
34188 enum machine_mode mode0 = insn_p->operand[0].mode;
34189 enum machine_mode mode1 = insn_p->operand[1].mode;
34190 enum rtx_code comparison = UNEQ;
34191 bool need_ucomi = false;
34193 /* See avxintrin.h for values. */
34194 enum rtx_code comi_comparisons[32] =
34196 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34197 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34198 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34200 bool need_ucomi_values[32] =
34202 true, false, false, true, true, false, false, true,
34203 true, false, false, true, true, false, false, true,
34204 false, true, true, false, false, true, true, false,
34205 false, true, true, false, false, true, true, false
34208 if (!CONST_INT_P (op2))
34210 error ("the third argument must be comparison constant");
34211 return const0_rtx;
34213 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34215 error ("incorect comparison mode");
34216 return const0_rtx;
34219 if (!insn_p->operand[2].predicate (op3, SImode))
34221 error ("incorrect rounding operand");
34222 return const0_rtx;
34225 comparison = comi_comparisons[INTVAL (op2)];
34226 need_ucomi = need_ucomi_values[INTVAL (op2)];
34228 if (VECTOR_MODE_P (mode0))
34229 op0 = safe_vector_operand (op0, mode0);
34230 if (VECTOR_MODE_P (mode1))
34231 op1 = safe_vector_operand (op1, mode1);
34233 target = gen_reg_rtx (SImode);
34234 emit_move_insn (target, const0_rtx);
34235 target = gen_rtx_SUBREG (QImode, target, 0);
34237 if ((optimize && !register_operand (op0, mode0))
34238 || !insn_p->operand[0].predicate (op0, mode0))
34239 op0 = copy_to_mode_reg (mode0, op0);
34240 if ((optimize && !register_operand (op1, mode1))
34241 || !insn_p->operand[1].predicate (op1, mode1))
34242 op1 = copy_to_mode_reg (mode1, op1);
34244 if (need_ucomi)
34245 icode = icode == CODE_FOR_sse_comi_round
34246 ? CODE_FOR_sse_ucomi_round
34247 : CODE_FOR_sse2_ucomi_round;
34249 pat = GEN_FCN (icode) (op0, op1, op3);
34250 if (! pat)
34251 return 0;
34253 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34254 if (INTVAL (op3) == NO_ROUND)
34256 pat = ix86_erase_embedded_rounding (pat);
34257 if (! pat)
34258 return 0;
34260 set_dst = SET_DEST (pat);
34262 else
34264 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34265 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34268 emit_insn (pat);
34269 emit_insn (gen_rtx_SET (VOIDmode,
34270 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34271 gen_rtx_fmt_ee (comparison, QImode,
34272 set_dst,
34273 const0_rtx)));
34275 return SUBREG_REG (target);
34278 static rtx
34279 ix86_expand_round_builtin (const struct builtin_description *d,
34280 tree exp, rtx target)
34282 rtx pat;
34283 unsigned int i, nargs;
34284 struct
34286 rtx op;
34287 enum machine_mode mode;
34288 } args[6];
34289 enum insn_code icode = d->icode;
34290 const struct insn_data_d *insn_p = &insn_data[icode];
34291 enum machine_mode tmode = insn_p->operand[0].mode;
34292 unsigned int nargs_constant = 0;
34293 unsigned int redundant_embed_rnd = 0;
34295 switch ((enum ix86_builtin_func_type) d->flag)
34297 case UINT64_FTYPE_V2DF_INT:
34298 case UINT64_FTYPE_V4SF_INT:
34299 case UINT_FTYPE_V2DF_INT:
34300 case UINT_FTYPE_V4SF_INT:
34301 case INT64_FTYPE_V2DF_INT:
34302 case INT64_FTYPE_V4SF_INT:
34303 case INT_FTYPE_V2DF_INT:
34304 case INT_FTYPE_V4SF_INT:
34305 nargs = 2;
34306 break;
34307 case V4SF_FTYPE_V4SF_UINT_INT:
34308 case V4SF_FTYPE_V4SF_UINT64_INT:
34309 case V2DF_FTYPE_V2DF_UINT64_INT:
34310 case V4SF_FTYPE_V4SF_INT_INT:
34311 case V4SF_FTYPE_V4SF_INT64_INT:
34312 case V2DF_FTYPE_V2DF_INT64_INT:
34313 case V4SF_FTYPE_V4SF_V4SF_INT:
34314 case V2DF_FTYPE_V2DF_V2DF_INT:
34315 case V4SF_FTYPE_V4SF_V2DF_INT:
34316 case V2DF_FTYPE_V2DF_V4SF_INT:
34317 nargs = 3;
34318 break;
34319 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34320 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34321 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34322 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34323 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34324 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34325 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34326 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34327 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34328 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34329 nargs = 4;
34330 break;
34331 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34332 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34333 nargs_constant = 2;
34334 nargs = 4;
34335 break;
34336 case INT_FTYPE_V4SF_V4SF_INT_INT:
34337 case INT_FTYPE_V2DF_V2DF_INT_INT:
34338 return ix86_expand_sse_comi_round (d, exp, target);
34339 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34340 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34341 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34342 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34343 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34344 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34345 nargs = 5;
34346 break;
34347 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34348 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34349 nargs_constant = 4;
34350 nargs = 5;
34351 break;
34352 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34353 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34354 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34355 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34356 nargs_constant = 3;
34357 nargs = 5;
34358 break;
34359 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34360 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34361 nargs = 6;
34362 nargs_constant = 4;
34363 break;
34364 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34365 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34366 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34367 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34368 nargs = 6;
34369 nargs_constant = 3;
34370 break;
34371 default:
34372 gcc_unreachable ();
34374 gcc_assert (nargs <= ARRAY_SIZE (args));
34376 if (optimize
34377 || target == 0
34378 || GET_MODE (target) != tmode
34379 || !insn_p->operand[0].predicate (target, tmode))
34380 target = gen_reg_rtx (tmode);
34382 for (i = 0; i < nargs; i++)
34384 tree arg = CALL_EXPR_ARG (exp, i);
34385 rtx op = expand_normal (arg);
34386 enum machine_mode mode = insn_p->operand[i + 1].mode;
34387 bool match = insn_p->operand[i + 1].predicate (op, mode);
34389 if (i == nargs - nargs_constant)
34391 if (!match)
34393 switch (icode)
34395 case CODE_FOR_avx512f_getmantv8df_mask_round:
34396 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34397 case CODE_FOR_avx512f_getmantv2df_round:
34398 case CODE_FOR_avx512f_getmantv4sf_round:
34399 error ("the immediate argument must be a 4-bit immediate");
34400 return const0_rtx;
34401 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34402 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34403 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34404 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34405 error ("the immediate argument must be a 5-bit immediate");
34406 return const0_rtx;
34407 default:
34408 error ("the immediate argument must be an 8-bit immediate");
34409 return const0_rtx;
34413 else if (i == nargs-1)
34415 if (!insn_p->operand[nargs].predicate (op, SImode))
34417 error ("incorrect rounding operand");
34418 return const0_rtx;
34421 /* If there is no rounding use normal version of the pattern. */
34422 if (INTVAL (op) == NO_ROUND)
34423 redundant_embed_rnd = 1;
34425 else
34427 if (VECTOR_MODE_P (mode))
34428 op = safe_vector_operand (op, mode);
34430 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34432 if (optimize || !match)
34433 op = copy_to_mode_reg (mode, op);
34435 else
34437 op = copy_to_reg (op);
34438 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34442 args[i].op = op;
34443 args[i].mode = mode;
34446 switch (nargs)
34448 case 1:
34449 pat = GEN_FCN (icode) (target, args[0].op);
34450 break;
34451 case 2:
34452 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34453 break;
34454 case 3:
34455 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34456 args[2].op);
34457 break;
34458 case 4:
34459 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34460 args[2].op, args[3].op);
34461 break;
34462 case 5:
34463 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34464 args[2].op, args[3].op, args[4].op);
34465 case 6:
34466 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34467 args[2].op, args[3].op, args[4].op,
34468 args[5].op);
34469 break;
34470 default:
34471 gcc_unreachable ();
34474 if (!pat)
34475 return 0;
34477 if (redundant_embed_rnd)
34478 pat = ix86_erase_embedded_rounding (pat);
34480 emit_insn (pat);
34481 return target;
34484 /* Subroutine of ix86_expand_builtin to take care of special insns
34485 with variable number of operands. */
34487 static rtx
34488 ix86_expand_special_args_builtin (const struct builtin_description *d,
34489 tree exp, rtx target)
34491 tree arg;
34492 rtx pat, op;
34493 unsigned int i, nargs, arg_adjust, memory;
34494 bool aligned_mem = false;
34495 struct
34497 rtx op;
34498 enum machine_mode mode;
34499 } args[3];
34500 enum insn_code icode = d->icode;
34501 bool last_arg_constant = false;
34502 const struct insn_data_d *insn_p = &insn_data[icode];
34503 enum machine_mode tmode = insn_p->operand[0].mode;
34504 enum { load, store } klass;
34506 switch ((enum ix86_builtin_func_type) d->flag)
34508 case VOID_FTYPE_VOID:
34509 emit_insn (GEN_FCN (icode) (target));
34510 return 0;
34511 case VOID_FTYPE_UINT64:
34512 case VOID_FTYPE_UNSIGNED:
34513 nargs = 0;
34514 klass = store;
34515 memory = 0;
34516 break;
34518 case INT_FTYPE_VOID:
34519 case UINT64_FTYPE_VOID:
34520 case UNSIGNED_FTYPE_VOID:
34521 nargs = 0;
34522 klass = load;
34523 memory = 0;
34524 break;
34525 case UINT64_FTYPE_PUNSIGNED:
34526 case V2DI_FTYPE_PV2DI:
34527 case V4DI_FTYPE_PV4DI:
34528 case V32QI_FTYPE_PCCHAR:
34529 case V16QI_FTYPE_PCCHAR:
34530 case V8SF_FTYPE_PCV4SF:
34531 case V8SF_FTYPE_PCFLOAT:
34532 case V4SF_FTYPE_PCFLOAT:
34533 case V4DF_FTYPE_PCV2DF:
34534 case V4DF_FTYPE_PCDOUBLE:
34535 case V2DF_FTYPE_PCDOUBLE:
34536 case VOID_FTYPE_PVOID:
34537 case V16SI_FTYPE_PV4SI:
34538 case V16SF_FTYPE_PV4SF:
34539 case V8DI_FTYPE_PV4DI:
34540 case V8DI_FTYPE_PV8DI:
34541 case V8DF_FTYPE_PV4DF:
34542 nargs = 1;
34543 klass = load;
34544 memory = 0;
34545 switch (icode)
34547 case CODE_FOR_sse4_1_movntdqa:
34548 case CODE_FOR_avx2_movntdqa:
34549 case CODE_FOR_avx512f_movntdqa:
34550 aligned_mem = true;
34551 break;
34552 default:
34553 break;
34555 break;
34556 case VOID_FTYPE_PV2SF_V4SF:
34557 case VOID_FTYPE_PV8DI_V8DI:
34558 case VOID_FTYPE_PV4DI_V4DI:
34559 case VOID_FTYPE_PV2DI_V2DI:
34560 case VOID_FTYPE_PCHAR_V32QI:
34561 case VOID_FTYPE_PCHAR_V16QI:
34562 case VOID_FTYPE_PFLOAT_V16SF:
34563 case VOID_FTYPE_PFLOAT_V8SF:
34564 case VOID_FTYPE_PFLOAT_V4SF:
34565 case VOID_FTYPE_PDOUBLE_V8DF:
34566 case VOID_FTYPE_PDOUBLE_V4DF:
34567 case VOID_FTYPE_PDOUBLE_V2DF:
34568 case VOID_FTYPE_PLONGLONG_LONGLONG:
34569 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34570 case VOID_FTYPE_PINT_INT:
34571 nargs = 1;
34572 klass = store;
34573 /* Reserve memory operand for target. */
34574 memory = ARRAY_SIZE (args);
34575 switch (icode)
34577 /* These builtins and instructions require the memory
34578 to be properly aligned. */
34579 case CODE_FOR_avx_movntv4di:
34580 case CODE_FOR_sse2_movntv2di:
34581 case CODE_FOR_avx_movntv8sf:
34582 case CODE_FOR_sse_movntv4sf:
34583 case CODE_FOR_sse4a_vmmovntv4sf:
34584 case CODE_FOR_avx_movntv4df:
34585 case CODE_FOR_sse2_movntv2df:
34586 case CODE_FOR_sse4a_vmmovntv2df:
34587 case CODE_FOR_sse2_movntidi:
34588 case CODE_FOR_sse_movntq:
34589 case CODE_FOR_sse2_movntisi:
34590 case CODE_FOR_avx512f_movntv16sf:
34591 case CODE_FOR_avx512f_movntv8df:
34592 case CODE_FOR_avx512f_movntv8di:
34593 aligned_mem = true;
34594 break;
34595 default:
34596 break;
34598 break;
34599 case V4SF_FTYPE_V4SF_PCV2SF:
34600 case V2DF_FTYPE_V2DF_PCDOUBLE:
34601 nargs = 2;
34602 klass = load;
34603 memory = 1;
34604 break;
34605 case V8SF_FTYPE_PCV8SF_V8SI:
34606 case V4DF_FTYPE_PCV4DF_V4DI:
34607 case V4SF_FTYPE_PCV4SF_V4SI:
34608 case V2DF_FTYPE_PCV2DF_V2DI:
34609 case V8SI_FTYPE_PCV8SI_V8SI:
34610 case V4DI_FTYPE_PCV4DI_V4DI:
34611 case V4SI_FTYPE_PCV4SI_V4SI:
34612 case V2DI_FTYPE_PCV2DI_V2DI:
34613 nargs = 2;
34614 klass = load;
34615 memory = 0;
34616 break;
34617 case VOID_FTYPE_PV8DF_V8DF_QI:
34618 case VOID_FTYPE_PV16SF_V16SF_HI:
34619 case VOID_FTYPE_PV8DI_V8DI_QI:
34620 case VOID_FTYPE_PV16SI_V16SI_HI:
34621 switch (icode)
34623 /* These builtins and instructions require the memory
34624 to be properly aligned. */
34625 case CODE_FOR_avx512f_storev16sf_mask:
34626 case CODE_FOR_avx512f_storev16si_mask:
34627 case CODE_FOR_avx512f_storev8df_mask:
34628 case CODE_FOR_avx512f_storev8di_mask:
34629 aligned_mem = true;
34630 break;
34631 default:
34632 break;
34634 /* FALLTHRU */
34635 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34636 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34637 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34638 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34639 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34640 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34641 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34642 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34643 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34644 case VOID_FTYPE_PFLOAT_V4SF_QI:
34645 case VOID_FTYPE_PV8SI_V8DI_QI:
34646 case VOID_FTYPE_PV8HI_V8DI_QI:
34647 case VOID_FTYPE_PV16HI_V16SI_HI:
34648 case VOID_FTYPE_PV16QI_V8DI_QI:
34649 case VOID_FTYPE_PV16QI_V16SI_HI:
34650 nargs = 2;
34651 klass = store;
34652 /* Reserve memory operand for target. */
34653 memory = ARRAY_SIZE (args);
34654 break;
34655 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34656 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34657 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34658 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34659 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34660 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34661 nargs = 3;
34662 klass = load;
34663 memory = 0;
34664 switch (icode)
34666 /* These builtins and instructions require the memory
34667 to be properly aligned. */
34668 case CODE_FOR_avx512f_loadv16sf_mask:
34669 case CODE_FOR_avx512f_loadv16si_mask:
34670 case CODE_FOR_avx512f_loadv8df_mask:
34671 case CODE_FOR_avx512f_loadv8di_mask:
34672 aligned_mem = true;
34673 break;
34674 default:
34675 break;
34677 break;
34678 case VOID_FTYPE_UINT_UINT_UINT:
34679 case VOID_FTYPE_UINT64_UINT_UINT:
34680 case UCHAR_FTYPE_UINT_UINT_UINT:
34681 case UCHAR_FTYPE_UINT64_UINT_UINT:
34682 nargs = 3;
34683 klass = load;
34684 memory = ARRAY_SIZE (args);
34685 last_arg_constant = true;
34686 break;
34687 default:
34688 gcc_unreachable ();
34691 gcc_assert (nargs <= ARRAY_SIZE (args));
34693 if (klass == store)
34695 arg = CALL_EXPR_ARG (exp, 0);
34696 op = expand_normal (arg);
34697 gcc_assert (target == 0);
34698 if (memory)
34700 op = ix86_zero_extend_to_Pmode (op);
34701 target = gen_rtx_MEM (tmode, op);
34702 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34703 on it. Try to improve it using get_pointer_alignment,
34704 and if the special builtin is one that requires strict
34705 mode alignment, also from it's GET_MODE_ALIGNMENT.
34706 Failure to do so could lead to ix86_legitimate_combined_insn
34707 rejecting all changes to such insns. */
34708 unsigned int align = get_pointer_alignment (arg);
34709 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34710 align = GET_MODE_ALIGNMENT (tmode);
34711 if (MEM_ALIGN (target) < align)
34712 set_mem_align (target, align);
34714 else
34715 target = force_reg (tmode, op);
34716 arg_adjust = 1;
34718 else
34720 arg_adjust = 0;
34721 if (optimize
34722 || target == 0
34723 || !register_operand (target, tmode)
34724 || GET_MODE (target) != tmode)
34725 target = gen_reg_rtx (tmode);
34728 for (i = 0; i < nargs; i++)
34730 enum machine_mode mode = insn_p->operand[i + 1].mode;
34731 bool match;
34733 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34734 op = expand_normal (arg);
34735 match = insn_p->operand[i + 1].predicate (op, mode);
34737 if (last_arg_constant && (i + 1) == nargs)
34739 if (!match)
34741 if (icode == CODE_FOR_lwp_lwpvalsi3
34742 || icode == CODE_FOR_lwp_lwpinssi3
34743 || icode == CODE_FOR_lwp_lwpvaldi3
34744 || icode == CODE_FOR_lwp_lwpinsdi3)
34745 error ("the last argument must be a 32-bit immediate");
34746 else
34747 error ("the last argument must be an 8-bit immediate");
34748 return const0_rtx;
34751 else
34753 if (i == memory)
34755 /* This must be the memory operand. */
34756 op = ix86_zero_extend_to_Pmode (op);
34757 op = gen_rtx_MEM (mode, op);
34758 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34759 on it. Try to improve it using get_pointer_alignment,
34760 and if the special builtin is one that requires strict
34761 mode alignment, also from it's GET_MODE_ALIGNMENT.
34762 Failure to do so could lead to ix86_legitimate_combined_insn
34763 rejecting all changes to such insns. */
34764 unsigned int align = get_pointer_alignment (arg);
34765 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34766 align = GET_MODE_ALIGNMENT (mode);
34767 if (MEM_ALIGN (op) < align)
34768 set_mem_align (op, align);
34770 else
34772 /* This must be register. */
34773 if (VECTOR_MODE_P (mode))
34774 op = safe_vector_operand (op, mode);
34776 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34777 op = copy_to_mode_reg (mode, op);
34778 else
34780 op = copy_to_reg (op);
34781 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34786 args[i].op = op;
34787 args[i].mode = mode;
34790 switch (nargs)
34792 case 0:
34793 pat = GEN_FCN (icode) (target);
34794 break;
34795 case 1:
34796 pat = GEN_FCN (icode) (target, args[0].op);
34797 break;
34798 case 2:
34799 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34800 break;
34801 case 3:
34802 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34803 break;
34804 default:
34805 gcc_unreachable ();
34808 if (! pat)
34809 return 0;
34810 emit_insn (pat);
34811 return klass == store ? 0 : target;
34814 /* Return the integer constant in ARG. Constrain it to be in the range
34815 of the subparts of VEC_TYPE; issue an error if not. */
34817 static int
34818 get_element_number (tree vec_type, tree arg)
34820 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34822 if (!tree_fits_uhwi_p (arg)
34823 || (elt = tree_to_uhwi (arg), elt > max))
34825 error ("selector must be an integer constant in the range 0..%wi", max);
34826 return 0;
34829 return elt;
34832 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34833 ix86_expand_vector_init. We DO have language-level syntax for this, in
34834 the form of (type){ init-list }. Except that since we can't place emms
34835 instructions from inside the compiler, we can't allow the use of MMX
34836 registers unless the user explicitly asks for it. So we do *not* define
34837 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34838 we have builtins invoked by mmintrin.h that gives us license to emit
34839 these sorts of instructions. */
34841 static rtx
34842 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34844 enum machine_mode tmode = TYPE_MODE (type);
34845 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34846 int i, n_elt = GET_MODE_NUNITS (tmode);
34847 rtvec v = rtvec_alloc (n_elt);
34849 gcc_assert (VECTOR_MODE_P (tmode));
34850 gcc_assert (call_expr_nargs (exp) == n_elt);
34852 for (i = 0; i < n_elt; ++i)
34854 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34855 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34858 if (!target || !register_operand (target, tmode))
34859 target = gen_reg_rtx (tmode);
34861 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34862 return target;
34865 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34866 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34867 had a language-level syntax for referencing vector elements. */
34869 static rtx
34870 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34872 enum machine_mode tmode, mode0;
34873 tree arg0, arg1;
34874 int elt;
34875 rtx op0;
34877 arg0 = CALL_EXPR_ARG (exp, 0);
34878 arg1 = CALL_EXPR_ARG (exp, 1);
34880 op0 = expand_normal (arg0);
34881 elt = get_element_number (TREE_TYPE (arg0), arg1);
34883 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34884 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34885 gcc_assert (VECTOR_MODE_P (mode0));
34887 op0 = force_reg (mode0, op0);
34889 if (optimize || !target || !register_operand (target, tmode))
34890 target = gen_reg_rtx (tmode);
34892 ix86_expand_vector_extract (true, target, op0, elt);
34894 return target;
34897 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34898 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34899 a language-level syntax for referencing vector elements. */
34901 static rtx
34902 ix86_expand_vec_set_builtin (tree exp)
34904 enum machine_mode tmode, mode1;
34905 tree arg0, arg1, arg2;
34906 int elt;
34907 rtx op0, op1, target;
34909 arg0 = CALL_EXPR_ARG (exp, 0);
34910 arg1 = CALL_EXPR_ARG (exp, 1);
34911 arg2 = CALL_EXPR_ARG (exp, 2);
34913 tmode = TYPE_MODE (TREE_TYPE (arg0));
34914 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34915 gcc_assert (VECTOR_MODE_P (tmode));
34917 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34918 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34919 elt = get_element_number (TREE_TYPE (arg0), arg2);
34921 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34922 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34924 op0 = force_reg (tmode, op0);
34925 op1 = force_reg (mode1, op1);
34927 /* OP0 is the source of these builtin functions and shouldn't be
34928 modified. Create a copy, use it and return it as target. */
34929 target = gen_reg_rtx (tmode);
34930 emit_move_insn (target, op0);
34931 ix86_expand_vector_set (true, target, op1, elt);
34933 return target;
34936 /* Expand an expression EXP that calls a built-in function,
34937 with result going to TARGET if that's convenient
34938 (and in mode MODE if that's convenient).
34939 SUBTARGET may be used as the target for computing one of EXP's operands.
34940 IGNORE is nonzero if the value is to be ignored. */
34942 static rtx
34943 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34944 enum machine_mode mode, int ignore)
34946 const struct builtin_description *d;
34947 size_t i;
34948 enum insn_code icode;
34949 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34950 tree arg0, arg1, arg2, arg3, arg4;
34951 rtx op0, op1, op2, op3, op4, pat, insn;
34952 enum machine_mode mode0, mode1, mode2, mode3, mode4;
34953 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34955 /* For CPU builtins that can be folded, fold first and expand the fold. */
34956 switch (fcode)
34958 case IX86_BUILTIN_CPU_INIT:
34960 /* Make it call __cpu_indicator_init in libgcc. */
34961 tree call_expr, fndecl, type;
34962 type = build_function_type_list (integer_type_node, NULL_TREE);
34963 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34964 call_expr = build_call_expr (fndecl, 0);
34965 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34967 case IX86_BUILTIN_CPU_IS:
34968 case IX86_BUILTIN_CPU_SUPPORTS:
34970 tree arg0 = CALL_EXPR_ARG (exp, 0);
34971 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
34972 gcc_assert (fold_expr != NULL_TREE);
34973 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
34977 /* Determine whether the builtin function is available under the current ISA.
34978 Originally the builtin was not created if it wasn't applicable to the
34979 current ISA based on the command line switches. With function specific
34980 options, we need to check in the context of the function making the call
34981 whether it is supported. */
34982 if (ix86_builtins_isa[fcode].isa
34983 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
34985 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
34986 NULL, (enum fpmath_unit) 0, false);
34988 if (!opts)
34989 error ("%qE needs unknown isa option", fndecl);
34990 else
34992 gcc_assert (opts != NULL);
34993 error ("%qE needs isa option %s", fndecl, opts);
34994 free (opts);
34996 return const0_rtx;
34999 switch (fcode)
35001 case IX86_BUILTIN_MASKMOVQ:
35002 case IX86_BUILTIN_MASKMOVDQU:
35003 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35004 ? CODE_FOR_mmx_maskmovq
35005 : CODE_FOR_sse2_maskmovdqu);
35006 /* Note the arg order is different from the operand order. */
35007 arg1 = CALL_EXPR_ARG (exp, 0);
35008 arg2 = CALL_EXPR_ARG (exp, 1);
35009 arg0 = CALL_EXPR_ARG (exp, 2);
35010 op0 = expand_normal (arg0);
35011 op1 = expand_normal (arg1);
35012 op2 = expand_normal (arg2);
35013 mode0 = insn_data[icode].operand[0].mode;
35014 mode1 = insn_data[icode].operand[1].mode;
35015 mode2 = insn_data[icode].operand[2].mode;
35017 op0 = ix86_zero_extend_to_Pmode (op0);
35018 op0 = gen_rtx_MEM (mode1, op0);
35020 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35021 op0 = copy_to_mode_reg (mode0, op0);
35022 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35023 op1 = copy_to_mode_reg (mode1, op1);
35024 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35025 op2 = copy_to_mode_reg (mode2, op2);
35026 pat = GEN_FCN (icode) (op0, op1, op2);
35027 if (! pat)
35028 return 0;
35029 emit_insn (pat);
35030 return 0;
35032 case IX86_BUILTIN_LDMXCSR:
35033 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35034 target = assign_386_stack_local (SImode, SLOT_TEMP);
35035 emit_move_insn (target, op0);
35036 emit_insn (gen_sse_ldmxcsr (target));
35037 return 0;
35039 case IX86_BUILTIN_STMXCSR:
35040 target = assign_386_stack_local (SImode, SLOT_TEMP);
35041 emit_insn (gen_sse_stmxcsr (target));
35042 return copy_to_mode_reg (SImode, target);
35044 case IX86_BUILTIN_CLFLUSH:
35045 arg0 = CALL_EXPR_ARG (exp, 0);
35046 op0 = expand_normal (arg0);
35047 icode = CODE_FOR_sse2_clflush;
35048 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35049 op0 = ix86_zero_extend_to_Pmode (op0);
35051 emit_insn (gen_sse2_clflush (op0));
35052 return 0;
35054 case IX86_BUILTIN_MONITOR:
35055 arg0 = CALL_EXPR_ARG (exp, 0);
35056 arg1 = CALL_EXPR_ARG (exp, 1);
35057 arg2 = CALL_EXPR_ARG (exp, 2);
35058 op0 = expand_normal (arg0);
35059 op1 = expand_normal (arg1);
35060 op2 = expand_normal (arg2);
35061 if (!REG_P (op0))
35062 op0 = ix86_zero_extend_to_Pmode (op0);
35063 if (!REG_P (op1))
35064 op1 = copy_to_mode_reg (SImode, op1);
35065 if (!REG_P (op2))
35066 op2 = copy_to_mode_reg (SImode, op2);
35067 emit_insn (ix86_gen_monitor (op0, op1, op2));
35068 return 0;
35070 case IX86_BUILTIN_MWAIT:
35071 arg0 = CALL_EXPR_ARG (exp, 0);
35072 arg1 = CALL_EXPR_ARG (exp, 1);
35073 op0 = expand_normal (arg0);
35074 op1 = expand_normal (arg1);
35075 if (!REG_P (op0))
35076 op0 = copy_to_mode_reg (SImode, op0);
35077 if (!REG_P (op1))
35078 op1 = copy_to_mode_reg (SImode, op1);
35079 emit_insn (gen_sse3_mwait (op0, op1));
35080 return 0;
35082 case IX86_BUILTIN_VEC_INIT_V2SI:
35083 case IX86_BUILTIN_VEC_INIT_V4HI:
35084 case IX86_BUILTIN_VEC_INIT_V8QI:
35085 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35087 case IX86_BUILTIN_VEC_EXT_V2DF:
35088 case IX86_BUILTIN_VEC_EXT_V2DI:
35089 case IX86_BUILTIN_VEC_EXT_V4SF:
35090 case IX86_BUILTIN_VEC_EXT_V4SI:
35091 case IX86_BUILTIN_VEC_EXT_V8HI:
35092 case IX86_BUILTIN_VEC_EXT_V2SI:
35093 case IX86_BUILTIN_VEC_EXT_V4HI:
35094 case IX86_BUILTIN_VEC_EXT_V16QI:
35095 return ix86_expand_vec_ext_builtin (exp, target);
35097 case IX86_BUILTIN_VEC_SET_V2DI:
35098 case IX86_BUILTIN_VEC_SET_V4SF:
35099 case IX86_BUILTIN_VEC_SET_V4SI:
35100 case IX86_BUILTIN_VEC_SET_V8HI:
35101 case IX86_BUILTIN_VEC_SET_V4HI:
35102 case IX86_BUILTIN_VEC_SET_V16QI:
35103 return ix86_expand_vec_set_builtin (exp);
35105 case IX86_BUILTIN_INFQ:
35106 case IX86_BUILTIN_HUGE_VALQ:
35108 REAL_VALUE_TYPE inf;
35109 rtx tmp;
35111 real_inf (&inf);
35112 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35114 tmp = validize_mem (force_const_mem (mode, tmp));
35116 if (target == 0)
35117 target = gen_reg_rtx (mode);
35119 emit_move_insn (target, tmp);
35120 return target;
35123 case IX86_BUILTIN_RDPMC:
35124 case IX86_BUILTIN_RDTSC:
35125 case IX86_BUILTIN_RDTSCP:
35127 op0 = gen_reg_rtx (DImode);
35128 op1 = gen_reg_rtx (DImode);
35130 if (fcode == IX86_BUILTIN_RDPMC)
35132 arg0 = CALL_EXPR_ARG (exp, 0);
35133 op2 = expand_normal (arg0);
35134 if (!register_operand (op2, SImode))
35135 op2 = copy_to_mode_reg (SImode, op2);
35137 insn = (TARGET_64BIT
35138 ? gen_rdpmc_rex64 (op0, op1, op2)
35139 : gen_rdpmc (op0, op2));
35140 emit_insn (insn);
35142 else if (fcode == IX86_BUILTIN_RDTSC)
35144 insn = (TARGET_64BIT
35145 ? gen_rdtsc_rex64 (op0, op1)
35146 : gen_rdtsc (op0));
35147 emit_insn (insn);
35149 else
35151 op2 = gen_reg_rtx (SImode);
35153 insn = (TARGET_64BIT
35154 ? gen_rdtscp_rex64 (op0, op1, op2)
35155 : gen_rdtscp (op0, op2));
35156 emit_insn (insn);
35158 arg0 = CALL_EXPR_ARG (exp, 0);
35159 op4 = expand_normal (arg0);
35160 if (!address_operand (op4, VOIDmode))
35162 op4 = convert_memory_address (Pmode, op4);
35163 op4 = copy_addr_to_reg (op4);
35165 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35168 if (target == 0)
35170 /* mode is VOIDmode if __builtin_rd* has been called
35171 without lhs. */
35172 if (mode == VOIDmode)
35173 return target;
35174 target = gen_reg_rtx (mode);
35177 if (TARGET_64BIT)
35179 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35180 op1, 1, OPTAB_DIRECT);
35181 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35182 op0, 1, OPTAB_DIRECT);
35185 emit_move_insn (target, op0);
35186 return target;
35188 case IX86_BUILTIN_FXSAVE:
35189 case IX86_BUILTIN_FXRSTOR:
35190 case IX86_BUILTIN_FXSAVE64:
35191 case IX86_BUILTIN_FXRSTOR64:
35192 case IX86_BUILTIN_FNSTENV:
35193 case IX86_BUILTIN_FLDENV:
35194 case IX86_BUILTIN_FNSTSW:
35195 mode0 = BLKmode;
35196 switch (fcode)
35198 case IX86_BUILTIN_FXSAVE:
35199 icode = CODE_FOR_fxsave;
35200 break;
35201 case IX86_BUILTIN_FXRSTOR:
35202 icode = CODE_FOR_fxrstor;
35203 break;
35204 case IX86_BUILTIN_FXSAVE64:
35205 icode = CODE_FOR_fxsave64;
35206 break;
35207 case IX86_BUILTIN_FXRSTOR64:
35208 icode = CODE_FOR_fxrstor64;
35209 break;
35210 case IX86_BUILTIN_FNSTENV:
35211 icode = CODE_FOR_fnstenv;
35212 break;
35213 case IX86_BUILTIN_FLDENV:
35214 icode = CODE_FOR_fldenv;
35215 break;
35216 case IX86_BUILTIN_FNSTSW:
35217 icode = CODE_FOR_fnstsw;
35218 mode0 = HImode;
35219 break;
35220 default:
35221 gcc_unreachable ();
35224 arg0 = CALL_EXPR_ARG (exp, 0);
35225 op0 = expand_normal (arg0);
35227 if (!address_operand (op0, VOIDmode))
35229 op0 = convert_memory_address (Pmode, op0);
35230 op0 = copy_addr_to_reg (op0);
35232 op0 = gen_rtx_MEM (mode0, op0);
35234 pat = GEN_FCN (icode) (op0);
35235 if (pat)
35236 emit_insn (pat);
35237 return 0;
35239 case IX86_BUILTIN_XSAVE:
35240 case IX86_BUILTIN_XRSTOR:
35241 case IX86_BUILTIN_XSAVE64:
35242 case IX86_BUILTIN_XRSTOR64:
35243 case IX86_BUILTIN_XSAVEOPT:
35244 case IX86_BUILTIN_XSAVEOPT64:
35245 arg0 = CALL_EXPR_ARG (exp, 0);
35246 arg1 = CALL_EXPR_ARG (exp, 1);
35247 op0 = expand_normal (arg0);
35248 op1 = expand_normal (arg1);
35250 if (!address_operand (op0, VOIDmode))
35252 op0 = convert_memory_address (Pmode, op0);
35253 op0 = copy_addr_to_reg (op0);
35255 op0 = gen_rtx_MEM (BLKmode, op0);
35257 op1 = force_reg (DImode, op1);
35259 if (TARGET_64BIT)
35261 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35262 NULL, 1, OPTAB_DIRECT);
35263 switch (fcode)
35265 case IX86_BUILTIN_XSAVE:
35266 icode = CODE_FOR_xsave_rex64;
35267 break;
35268 case IX86_BUILTIN_XRSTOR:
35269 icode = CODE_FOR_xrstor_rex64;
35270 break;
35271 case IX86_BUILTIN_XSAVE64:
35272 icode = CODE_FOR_xsave64;
35273 break;
35274 case IX86_BUILTIN_XRSTOR64:
35275 icode = CODE_FOR_xrstor64;
35276 break;
35277 case IX86_BUILTIN_XSAVEOPT:
35278 icode = CODE_FOR_xsaveopt_rex64;
35279 break;
35280 case IX86_BUILTIN_XSAVEOPT64:
35281 icode = CODE_FOR_xsaveopt64;
35282 break;
35283 default:
35284 gcc_unreachable ();
35287 op2 = gen_lowpart (SImode, op2);
35288 op1 = gen_lowpart (SImode, op1);
35289 pat = GEN_FCN (icode) (op0, op1, op2);
35291 else
35293 switch (fcode)
35295 case IX86_BUILTIN_XSAVE:
35296 icode = CODE_FOR_xsave;
35297 break;
35298 case IX86_BUILTIN_XRSTOR:
35299 icode = CODE_FOR_xrstor;
35300 break;
35301 case IX86_BUILTIN_XSAVEOPT:
35302 icode = CODE_FOR_xsaveopt;
35303 break;
35304 default:
35305 gcc_unreachable ();
35307 pat = GEN_FCN (icode) (op0, op1);
35310 if (pat)
35311 emit_insn (pat);
35312 return 0;
35314 case IX86_BUILTIN_LLWPCB:
35315 arg0 = CALL_EXPR_ARG (exp, 0);
35316 op0 = expand_normal (arg0);
35317 icode = CODE_FOR_lwp_llwpcb;
35318 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35319 op0 = ix86_zero_extend_to_Pmode (op0);
35320 emit_insn (gen_lwp_llwpcb (op0));
35321 return 0;
35323 case IX86_BUILTIN_SLWPCB:
35324 icode = CODE_FOR_lwp_slwpcb;
35325 if (!target
35326 || !insn_data[icode].operand[0].predicate (target, Pmode))
35327 target = gen_reg_rtx (Pmode);
35328 emit_insn (gen_lwp_slwpcb (target));
35329 return target;
35331 case IX86_BUILTIN_BEXTRI32:
35332 case IX86_BUILTIN_BEXTRI64:
35333 arg0 = CALL_EXPR_ARG (exp, 0);
35334 arg1 = CALL_EXPR_ARG (exp, 1);
35335 op0 = expand_normal (arg0);
35336 op1 = expand_normal (arg1);
35337 icode = (fcode == IX86_BUILTIN_BEXTRI32
35338 ? CODE_FOR_tbm_bextri_si
35339 : CODE_FOR_tbm_bextri_di);
35340 if (!CONST_INT_P (op1))
35342 error ("last argument must be an immediate");
35343 return const0_rtx;
35345 else
35347 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35348 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35349 op1 = GEN_INT (length);
35350 op2 = GEN_INT (lsb_index);
35351 pat = GEN_FCN (icode) (target, op0, op1, op2);
35352 if (pat)
35353 emit_insn (pat);
35354 return target;
35357 case IX86_BUILTIN_RDRAND16_STEP:
35358 icode = CODE_FOR_rdrandhi_1;
35359 mode0 = HImode;
35360 goto rdrand_step;
35362 case IX86_BUILTIN_RDRAND32_STEP:
35363 icode = CODE_FOR_rdrandsi_1;
35364 mode0 = SImode;
35365 goto rdrand_step;
35367 case IX86_BUILTIN_RDRAND64_STEP:
35368 icode = CODE_FOR_rdranddi_1;
35369 mode0 = DImode;
35371 rdrand_step:
35372 op0 = gen_reg_rtx (mode0);
35373 emit_insn (GEN_FCN (icode) (op0));
35375 arg0 = CALL_EXPR_ARG (exp, 0);
35376 op1 = expand_normal (arg0);
35377 if (!address_operand (op1, VOIDmode))
35379 op1 = convert_memory_address (Pmode, op1);
35380 op1 = copy_addr_to_reg (op1);
35382 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35384 op1 = gen_reg_rtx (SImode);
35385 emit_move_insn (op1, CONST1_RTX (SImode));
35387 /* Emit SImode conditional move. */
35388 if (mode0 == HImode)
35390 op2 = gen_reg_rtx (SImode);
35391 emit_insn (gen_zero_extendhisi2 (op2, op0));
35393 else if (mode0 == SImode)
35394 op2 = op0;
35395 else
35396 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35398 if (target == 0)
35399 target = gen_reg_rtx (SImode);
35401 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35402 const0_rtx);
35403 emit_insn (gen_rtx_SET (VOIDmode, target,
35404 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35405 return target;
35407 case IX86_BUILTIN_RDSEED16_STEP:
35408 icode = CODE_FOR_rdseedhi_1;
35409 mode0 = HImode;
35410 goto rdseed_step;
35412 case IX86_BUILTIN_RDSEED32_STEP:
35413 icode = CODE_FOR_rdseedsi_1;
35414 mode0 = SImode;
35415 goto rdseed_step;
35417 case IX86_BUILTIN_RDSEED64_STEP:
35418 icode = CODE_FOR_rdseeddi_1;
35419 mode0 = DImode;
35421 rdseed_step:
35422 op0 = gen_reg_rtx (mode0);
35423 emit_insn (GEN_FCN (icode) (op0));
35425 arg0 = CALL_EXPR_ARG (exp, 0);
35426 op1 = expand_normal (arg0);
35427 if (!address_operand (op1, VOIDmode))
35429 op1 = convert_memory_address (Pmode, op1);
35430 op1 = copy_addr_to_reg (op1);
35432 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35434 op2 = gen_reg_rtx (QImode);
35436 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35437 const0_rtx);
35438 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35440 if (target == 0)
35441 target = gen_reg_rtx (SImode);
35443 emit_insn (gen_zero_extendqisi2 (target, op2));
35444 return target;
35446 case IX86_BUILTIN_ADDCARRYX32:
35447 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35448 mode0 = SImode;
35449 goto addcarryx;
35451 case IX86_BUILTIN_ADDCARRYX64:
35452 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35453 mode0 = DImode;
35455 addcarryx:
35456 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35457 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35458 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35459 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35461 op0 = gen_reg_rtx (QImode);
35463 /* Generate CF from input operand. */
35464 op1 = expand_normal (arg0);
35465 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35466 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35468 /* Gen ADCX instruction to compute X+Y+CF. */
35469 op2 = expand_normal (arg1);
35470 op3 = expand_normal (arg2);
35472 if (!REG_P (op2))
35473 op2 = copy_to_mode_reg (mode0, op2);
35474 if (!REG_P (op3))
35475 op3 = copy_to_mode_reg (mode0, op3);
35477 op0 = gen_reg_rtx (mode0);
35479 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35480 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35481 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35483 /* Store the result. */
35484 op4 = expand_normal (arg3);
35485 if (!address_operand (op4, VOIDmode))
35487 op4 = convert_memory_address (Pmode, op4);
35488 op4 = copy_addr_to_reg (op4);
35490 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35492 /* Return current CF value. */
35493 if (target == 0)
35494 target = gen_reg_rtx (QImode);
35496 PUT_MODE (pat, QImode);
35497 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35498 return target;
35500 case IX86_BUILTIN_READ_FLAGS:
35501 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35503 if (optimize
35504 || target == NULL_RTX
35505 || !nonimmediate_operand (target, word_mode)
35506 || GET_MODE (target) != word_mode)
35507 target = gen_reg_rtx (word_mode);
35509 emit_insn (gen_pop (target));
35510 return target;
35512 case IX86_BUILTIN_WRITE_FLAGS:
35514 arg0 = CALL_EXPR_ARG (exp, 0);
35515 op0 = expand_normal (arg0);
35516 if (!general_no_elim_operand (op0, word_mode))
35517 op0 = copy_to_mode_reg (word_mode, op0);
35519 emit_insn (gen_push (op0));
35520 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35521 return 0;
35523 case IX86_BUILTIN_KORTESTC16:
35524 icode = CODE_FOR_kortestchi;
35525 mode0 = HImode;
35526 mode1 = CCCmode;
35527 goto kortest;
35529 case IX86_BUILTIN_KORTESTZ16:
35530 icode = CODE_FOR_kortestzhi;
35531 mode0 = HImode;
35532 mode1 = CCZmode;
35534 kortest:
35535 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35536 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35537 op0 = expand_normal (arg0);
35538 op1 = expand_normal (arg1);
35540 op0 = copy_to_reg (op0);
35541 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35542 op1 = copy_to_reg (op1);
35543 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35545 target = gen_reg_rtx (QImode);
35546 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35548 /* Emit kortest. */
35549 emit_insn (GEN_FCN (icode) (op0, op1));
35550 /* And use setcc to return result from flags. */
35551 ix86_expand_setcc (target, EQ,
35552 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35553 return target;
35555 case IX86_BUILTIN_GATHERSIV2DF:
35556 icode = CODE_FOR_avx2_gathersiv2df;
35557 goto gather_gen;
35558 case IX86_BUILTIN_GATHERSIV4DF:
35559 icode = CODE_FOR_avx2_gathersiv4df;
35560 goto gather_gen;
35561 case IX86_BUILTIN_GATHERDIV2DF:
35562 icode = CODE_FOR_avx2_gatherdiv2df;
35563 goto gather_gen;
35564 case IX86_BUILTIN_GATHERDIV4DF:
35565 icode = CODE_FOR_avx2_gatherdiv4df;
35566 goto gather_gen;
35567 case IX86_BUILTIN_GATHERSIV4SF:
35568 icode = CODE_FOR_avx2_gathersiv4sf;
35569 goto gather_gen;
35570 case IX86_BUILTIN_GATHERSIV8SF:
35571 icode = CODE_FOR_avx2_gathersiv8sf;
35572 goto gather_gen;
35573 case IX86_BUILTIN_GATHERDIV4SF:
35574 icode = CODE_FOR_avx2_gatherdiv4sf;
35575 goto gather_gen;
35576 case IX86_BUILTIN_GATHERDIV8SF:
35577 icode = CODE_FOR_avx2_gatherdiv8sf;
35578 goto gather_gen;
35579 case IX86_BUILTIN_GATHERSIV2DI:
35580 icode = CODE_FOR_avx2_gathersiv2di;
35581 goto gather_gen;
35582 case IX86_BUILTIN_GATHERSIV4DI:
35583 icode = CODE_FOR_avx2_gathersiv4di;
35584 goto gather_gen;
35585 case IX86_BUILTIN_GATHERDIV2DI:
35586 icode = CODE_FOR_avx2_gatherdiv2di;
35587 goto gather_gen;
35588 case IX86_BUILTIN_GATHERDIV4DI:
35589 icode = CODE_FOR_avx2_gatherdiv4di;
35590 goto gather_gen;
35591 case IX86_BUILTIN_GATHERSIV4SI:
35592 icode = CODE_FOR_avx2_gathersiv4si;
35593 goto gather_gen;
35594 case IX86_BUILTIN_GATHERSIV8SI:
35595 icode = CODE_FOR_avx2_gathersiv8si;
35596 goto gather_gen;
35597 case IX86_BUILTIN_GATHERDIV4SI:
35598 icode = CODE_FOR_avx2_gatherdiv4si;
35599 goto gather_gen;
35600 case IX86_BUILTIN_GATHERDIV8SI:
35601 icode = CODE_FOR_avx2_gatherdiv8si;
35602 goto gather_gen;
35603 case IX86_BUILTIN_GATHERALTSIV4DF:
35604 icode = CODE_FOR_avx2_gathersiv4df;
35605 goto gather_gen;
35606 case IX86_BUILTIN_GATHERALTDIV8SF:
35607 icode = CODE_FOR_avx2_gatherdiv8sf;
35608 goto gather_gen;
35609 case IX86_BUILTIN_GATHERALTSIV4DI:
35610 icode = CODE_FOR_avx2_gathersiv4di;
35611 goto gather_gen;
35612 case IX86_BUILTIN_GATHERALTDIV8SI:
35613 icode = CODE_FOR_avx2_gatherdiv8si;
35614 goto gather_gen;
35615 case IX86_BUILTIN_GATHER3SIV16SF:
35616 icode = CODE_FOR_avx512f_gathersiv16sf;
35617 goto gather_gen;
35618 case IX86_BUILTIN_GATHER3SIV8DF:
35619 icode = CODE_FOR_avx512f_gathersiv8df;
35620 goto gather_gen;
35621 case IX86_BUILTIN_GATHER3DIV16SF:
35622 icode = CODE_FOR_avx512f_gatherdiv16sf;
35623 goto gather_gen;
35624 case IX86_BUILTIN_GATHER3DIV8DF:
35625 icode = CODE_FOR_avx512f_gatherdiv8df;
35626 goto gather_gen;
35627 case IX86_BUILTIN_GATHER3SIV16SI:
35628 icode = CODE_FOR_avx512f_gathersiv16si;
35629 goto gather_gen;
35630 case IX86_BUILTIN_GATHER3SIV8DI:
35631 icode = CODE_FOR_avx512f_gathersiv8di;
35632 goto gather_gen;
35633 case IX86_BUILTIN_GATHER3DIV16SI:
35634 icode = CODE_FOR_avx512f_gatherdiv16si;
35635 goto gather_gen;
35636 case IX86_BUILTIN_GATHER3DIV8DI:
35637 icode = CODE_FOR_avx512f_gatherdiv8di;
35638 goto gather_gen;
35639 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35640 icode = CODE_FOR_avx512f_gathersiv8df;
35641 goto gather_gen;
35642 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35643 icode = CODE_FOR_avx512f_gatherdiv16sf;
35644 goto gather_gen;
35645 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35646 icode = CODE_FOR_avx512f_gathersiv8di;
35647 goto gather_gen;
35648 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35649 icode = CODE_FOR_avx512f_gatherdiv16si;
35650 goto gather_gen;
35651 case IX86_BUILTIN_SCATTERSIV16SF:
35652 icode = CODE_FOR_avx512f_scattersiv16sf;
35653 goto scatter_gen;
35654 case IX86_BUILTIN_SCATTERSIV8DF:
35655 icode = CODE_FOR_avx512f_scattersiv8df;
35656 goto scatter_gen;
35657 case IX86_BUILTIN_SCATTERDIV16SF:
35658 icode = CODE_FOR_avx512f_scatterdiv16sf;
35659 goto scatter_gen;
35660 case IX86_BUILTIN_SCATTERDIV8DF:
35661 icode = CODE_FOR_avx512f_scatterdiv8df;
35662 goto scatter_gen;
35663 case IX86_BUILTIN_SCATTERSIV16SI:
35664 icode = CODE_FOR_avx512f_scattersiv16si;
35665 goto scatter_gen;
35666 case IX86_BUILTIN_SCATTERSIV8DI:
35667 icode = CODE_FOR_avx512f_scattersiv8di;
35668 goto scatter_gen;
35669 case IX86_BUILTIN_SCATTERDIV16SI:
35670 icode = CODE_FOR_avx512f_scatterdiv16si;
35671 goto scatter_gen;
35672 case IX86_BUILTIN_SCATTERDIV8DI:
35673 icode = CODE_FOR_avx512f_scatterdiv8di;
35674 goto scatter_gen;
35676 case IX86_BUILTIN_GATHERPFDPD:
35677 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35678 goto vec_prefetch_gen;
35679 case IX86_BUILTIN_GATHERPFDPS:
35680 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35681 goto vec_prefetch_gen;
35682 case IX86_BUILTIN_GATHERPFQPD:
35683 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35684 goto vec_prefetch_gen;
35685 case IX86_BUILTIN_GATHERPFQPS:
35686 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35687 goto vec_prefetch_gen;
35688 case IX86_BUILTIN_SCATTERPFDPD:
35689 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35690 goto vec_prefetch_gen;
35691 case IX86_BUILTIN_SCATTERPFDPS:
35692 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35693 goto vec_prefetch_gen;
35694 case IX86_BUILTIN_SCATTERPFQPD:
35695 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35696 goto vec_prefetch_gen;
35697 case IX86_BUILTIN_SCATTERPFQPS:
35698 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35699 goto vec_prefetch_gen;
35701 gather_gen:
35702 rtx half;
35703 rtx (*gen) (rtx, rtx);
35705 arg0 = CALL_EXPR_ARG (exp, 0);
35706 arg1 = CALL_EXPR_ARG (exp, 1);
35707 arg2 = CALL_EXPR_ARG (exp, 2);
35708 arg3 = CALL_EXPR_ARG (exp, 3);
35709 arg4 = CALL_EXPR_ARG (exp, 4);
35710 op0 = expand_normal (arg0);
35711 op1 = expand_normal (arg1);
35712 op2 = expand_normal (arg2);
35713 op3 = expand_normal (arg3);
35714 op4 = expand_normal (arg4);
35715 /* Note the arg order is different from the operand order. */
35716 mode0 = insn_data[icode].operand[1].mode;
35717 mode2 = insn_data[icode].operand[3].mode;
35718 mode3 = insn_data[icode].operand[4].mode;
35719 mode4 = insn_data[icode].operand[5].mode;
35721 if (target == NULL_RTX
35722 || GET_MODE (target) != insn_data[icode].operand[0].mode
35723 || !insn_data[icode].operand[0].predicate (target,
35724 GET_MODE (target)))
35725 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35726 else
35727 subtarget = target;
35729 switch (fcode)
35731 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35732 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35733 half = gen_reg_rtx (V8SImode);
35734 if (!nonimmediate_operand (op2, V16SImode))
35735 op2 = copy_to_mode_reg (V16SImode, op2);
35736 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35737 op2 = half;
35738 break;
35739 case IX86_BUILTIN_GATHERALTSIV4DF:
35740 case IX86_BUILTIN_GATHERALTSIV4DI:
35741 half = gen_reg_rtx (V4SImode);
35742 if (!nonimmediate_operand (op2, V8SImode))
35743 op2 = copy_to_mode_reg (V8SImode, op2);
35744 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35745 op2 = half;
35746 break;
35747 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35748 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35749 half = gen_reg_rtx (mode0);
35750 if (mode0 == V8SFmode)
35751 gen = gen_vec_extract_lo_v16sf;
35752 else
35753 gen = gen_vec_extract_lo_v16si;
35754 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35755 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35756 emit_insn (gen (half, op0));
35757 op0 = half;
35758 if (GET_MODE (op3) != VOIDmode)
35760 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35761 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35762 emit_insn (gen (half, op3));
35763 op3 = half;
35765 break;
35766 case IX86_BUILTIN_GATHERALTDIV8SF:
35767 case IX86_BUILTIN_GATHERALTDIV8SI:
35768 half = gen_reg_rtx (mode0);
35769 if (mode0 == V4SFmode)
35770 gen = gen_vec_extract_lo_v8sf;
35771 else
35772 gen = gen_vec_extract_lo_v8si;
35773 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35774 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35775 emit_insn (gen (half, op0));
35776 op0 = half;
35777 if (GET_MODE (op3) != VOIDmode)
35779 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35780 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35781 emit_insn (gen (half, op3));
35782 op3 = half;
35784 break;
35785 default:
35786 break;
35789 /* Force memory operand only with base register here. But we
35790 don't want to do it on memory operand for other builtin
35791 functions. */
35792 op1 = ix86_zero_extend_to_Pmode (op1);
35794 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35795 op0 = copy_to_mode_reg (mode0, op0);
35796 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35797 op1 = copy_to_mode_reg (Pmode, op1);
35798 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35799 op2 = copy_to_mode_reg (mode2, op2);
35800 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35802 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35803 op3 = copy_to_mode_reg (mode3, op3);
35805 else
35807 op3 = copy_to_reg (op3);
35808 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35810 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35812 error ("the last argument must be scale 1, 2, 4, 8");
35813 return const0_rtx;
35816 /* Optimize. If mask is known to have all high bits set,
35817 replace op0 with pc_rtx to signal that the instruction
35818 overwrites the whole destination and doesn't use its
35819 previous contents. */
35820 if (optimize)
35822 if (TREE_CODE (arg3) == INTEGER_CST)
35824 if (integer_all_onesp (arg3))
35825 op0 = pc_rtx;
35827 else if (TREE_CODE (arg3) == VECTOR_CST)
35829 unsigned int negative = 0;
35830 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35832 tree cst = VECTOR_CST_ELT (arg3, i);
35833 if (TREE_CODE (cst) == INTEGER_CST
35834 && tree_int_cst_sign_bit (cst))
35835 negative++;
35836 else if (TREE_CODE (cst) == REAL_CST
35837 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35838 negative++;
35840 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35841 op0 = pc_rtx;
35843 else if (TREE_CODE (arg3) == SSA_NAME
35844 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35846 /* Recognize also when mask is like:
35847 __v2df src = _mm_setzero_pd ();
35848 __v2df mask = _mm_cmpeq_pd (src, src);
35850 __v8sf src = _mm256_setzero_ps ();
35851 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35852 as that is a cheaper way to load all ones into
35853 a register than having to load a constant from
35854 memory. */
35855 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35856 if (is_gimple_call (def_stmt))
35858 tree fndecl = gimple_call_fndecl (def_stmt);
35859 if (fndecl
35860 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35861 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35863 case IX86_BUILTIN_CMPPD:
35864 case IX86_BUILTIN_CMPPS:
35865 case IX86_BUILTIN_CMPPD256:
35866 case IX86_BUILTIN_CMPPS256:
35867 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35868 break;
35869 /* FALLTHRU */
35870 case IX86_BUILTIN_CMPEQPD:
35871 case IX86_BUILTIN_CMPEQPS:
35872 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35873 && initializer_zerop (gimple_call_arg (def_stmt,
35874 1)))
35875 op0 = pc_rtx;
35876 break;
35877 default:
35878 break;
35884 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35885 if (! pat)
35886 return const0_rtx;
35887 emit_insn (pat);
35889 switch (fcode)
35891 case IX86_BUILTIN_GATHER3DIV16SF:
35892 if (target == NULL_RTX)
35893 target = gen_reg_rtx (V8SFmode);
35894 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
35895 break;
35896 case IX86_BUILTIN_GATHER3DIV16SI:
35897 if (target == NULL_RTX)
35898 target = gen_reg_rtx (V8SImode);
35899 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
35900 break;
35901 case IX86_BUILTIN_GATHERDIV8SF:
35902 if (target == NULL_RTX)
35903 target = gen_reg_rtx (V4SFmode);
35904 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
35905 break;
35906 case IX86_BUILTIN_GATHERDIV8SI:
35907 if (target == NULL_RTX)
35908 target = gen_reg_rtx (V4SImode);
35909 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
35910 break;
35911 default:
35912 target = subtarget;
35913 break;
35915 return target;
35917 scatter_gen:
35918 arg0 = CALL_EXPR_ARG (exp, 0);
35919 arg1 = CALL_EXPR_ARG (exp, 1);
35920 arg2 = CALL_EXPR_ARG (exp, 2);
35921 arg3 = CALL_EXPR_ARG (exp, 3);
35922 arg4 = CALL_EXPR_ARG (exp, 4);
35923 op0 = expand_normal (arg0);
35924 op1 = expand_normal (arg1);
35925 op2 = expand_normal (arg2);
35926 op3 = expand_normal (arg3);
35927 op4 = expand_normal (arg4);
35928 mode1 = insn_data[icode].operand[1].mode;
35929 mode2 = insn_data[icode].operand[2].mode;
35930 mode3 = insn_data[icode].operand[3].mode;
35931 mode4 = insn_data[icode].operand[4].mode;
35933 /* Force memory operand only with base register here. But we
35934 don't want to do it on memory operand for other builtin
35935 functions. */
35936 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
35938 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35939 op0 = copy_to_mode_reg (Pmode, op0);
35941 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
35943 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35944 op1 = copy_to_mode_reg (mode1, op1);
35946 else
35948 op1 = copy_to_reg (op1);
35949 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
35952 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35953 op2 = copy_to_mode_reg (mode2, op2);
35955 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35956 op3 = copy_to_mode_reg (mode3, op3);
35958 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35960 error ("the last argument must be scale 1, 2, 4, 8");
35961 return const0_rtx;
35964 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35965 if (! pat)
35966 return const0_rtx;
35968 emit_insn (pat);
35969 return 0;
35971 vec_prefetch_gen:
35972 arg0 = CALL_EXPR_ARG (exp, 0);
35973 arg1 = CALL_EXPR_ARG (exp, 1);
35974 arg2 = CALL_EXPR_ARG (exp, 2);
35975 arg3 = CALL_EXPR_ARG (exp, 3);
35976 arg4 = CALL_EXPR_ARG (exp, 4);
35977 op0 = expand_normal (arg0);
35978 op1 = expand_normal (arg1);
35979 op2 = expand_normal (arg2);
35980 op3 = expand_normal (arg3);
35981 op4 = expand_normal (arg4);
35982 mode0 = insn_data[icode].operand[0].mode;
35983 mode1 = insn_data[icode].operand[1].mode;
35984 mode3 = insn_data[icode].operand[3].mode;
35985 mode4 = insn_data[icode].operand[4].mode;
35987 if (GET_MODE (op0) == mode0
35988 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
35990 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35991 op0 = copy_to_mode_reg (mode0, op0);
35993 else if (op0 != constm1_rtx)
35995 op0 = copy_to_reg (op0);
35996 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35999 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36000 op1 = copy_to_mode_reg (mode1, op1);
36002 /* Force memory operand only with base register here. But we
36003 don't want to do it on memory operand for other builtin
36004 functions. */
36005 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36007 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36008 op2 = copy_to_mode_reg (Pmode, op2);
36010 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36012 error ("the forth argument must be scale 1, 2, 4, 8");
36013 return const0_rtx;
36016 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36018 error ("the last argument must be hint 0 or 1");
36019 return const0_rtx;
36022 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36023 if (! pat)
36024 return const0_rtx;
36026 emit_insn (pat);
36028 return 0;
36030 case IX86_BUILTIN_XABORT:
36031 icode = CODE_FOR_xabort;
36032 arg0 = CALL_EXPR_ARG (exp, 0);
36033 op0 = expand_normal (arg0);
36034 mode0 = insn_data[icode].operand[0].mode;
36035 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36037 error ("the xabort's argument must be an 8-bit immediate");
36038 return const0_rtx;
36040 emit_insn (gen_xabort (op0));
36041 return 0;
36043 default:
36044 break;
36047 for (i = 0, d = bdesc_special_args;
36048 i < ARRAY_SIZE (bdesc_special_args);
36049 i++, d++)
36050 if (d->code == fcode)
36051 return ix86_expand_special_args_builtin (d, exp, target);
36053 for (i = 0, d = bdesc_args;
36054 i < ARRAY_SIZE (bdesc_args);
36055 i++, d++)
36056 if (d->code == fcode)
36057 switch (fcode)
36059 case IX86_BUILTIN_FABSQ:
36060 case IX86_BUILTIN_COPYSIGNQ:
36061 if (!TARGET_SSE)
36062 /* Emit a normal call if SSE isn't available. */
36063 return expand_call (exp, target, ignore);
36064 default:
36065 return ix86_expand_args_builtin (d, exp, target);
36068 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36069 if (d->code == fcode)
36070 return ix86_expand_sse_comi (d, exp, target);
36072 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36073 if (d->code == fcode)
36074 return ix86_expand_round_builtin (d, exp, target);
36076 for (i = 0, d = bdesc_pcmpestr;
36077 i < ARRAY_SIZE (bdesc_pcmpestr);
36078 i++, d++)
36079 if (d->code == fcode)
36080 return ix86_expand_sse_pcmpestr (d, exp, target);
36082 for (i = 0, d = bdesc_pcmpistr;
36083 i < ARRAY_SIZE (bdesc_pcmpistr);
36084 i++, d++)
36085 if (d->code == fcode)
36086 return ix86_expand_sse_pcmpistr (d, exp, target);
36088 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36089 if (d->code == fcode)
36090 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36091 (enum ix86_builtin_func_type)
36092 d->flag, d->comparison);
36094 gcc_unreachable ();
36097 /* This returns the target-specific builtin with code CODE if
36098 current_function_decl has visibility on this builtin, which is checked
36099 using isa flags. Returns NULL_TREE otherwise. */
36101 static tree ix86_get_builtin (enum ix86_builtins code)
36103 struct cl_target_option *opts;
36104 tree target_tree = NULL_TREE;
36106 /* Determine the isa flags of current_function_decl. */
36108 if (current_function_decl)
36109 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36111 if (target_tree == NULL)
36112 target_tree = target_option_default_node;
36114 opts = TREE_TARGET_OPTION (target_tree);
36116 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36117 return ix86_builtin_decl (code, true);
36118 else
36119 return NULL_TREE;
36122 /* Returns a function decl for a vectorized version of the builtin function
36123 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36124 if it is not available. */
36126 static tree
36127 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36128 tree type_in)
36130 enum machine_mode in_mode, out_mode;
36131 int in_n, out_n;
36132 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36134 if (TREE_CODE (type_out) != VECTOR_TYPE
36135 || TREE_CODE (type_in) != VECTOR_TYPE
36136 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36137 return NULL_TREE;
36139 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36140 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36141 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36142 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36144 switch (fn)
36146 case BUILT_IN_SQRT:
36147 if (out_mode == DFmode && in_mode == DFmode)
36149 if (out_n == 2 && in_n == 2)
36150 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36151 else if (out_n == 4 && in_n == 4)
36152 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36153 else if (out_n == 8 && in_n == 8)
36154 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36156 break;
36158 case BUILT_IN_EXP2F:
36159 if (out_mode == SFmode && in_mode == SFmode)
36161 if (out_n == 16 && in_n == 16)
36162 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36164 break;
36166 case BUILT_IN_SQRTF:
36167 if (out_mode == SFmode && in_mode == SFmode)
36169 if (out_n == 4 && in_n == 4)
36170 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36171 else if (out_n == 8 && in_n == 8)
36172 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36173 else if (out_n == 16 && in_n == 16)
36174 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36176 break;
36178 case BUILT_IN_IFLOOR:
36179 case BUILT_IN_LFLOOR:
36180 case BUILT_IN_LLFLOOR:
36181 /* The round insn does not trap on denormals. */
36182 if (flag_trapping_math || !TARGET_ROUND)
36183 break;
36185 if (out_mode == SImode && in_mode == DFmode)
36187 if (out_n == 4 && in_n == 2)
36188 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36189 else if (out_n == 8 && in_n == 4)
36190 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36191 else if (out_n == 16 && in_n == 8)
36192 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36194 break;
36196 case BUILT_IN_IFLOORF:
36197 case BUILT_IN_LFLOORF:
36198 case BUILT_IN_LLFLOORF:
36199 /* The round insn does not trap on denormals. */
36200 if (flag_trapping_math || !TARGET_ROUND)
36201 break;
36203 if (out_mode == SImode && in_mode == SFmode)
36205 if (out_n == 4 && in_n == 4)
36206 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36207 else if (out_n == 8 && in_n == 8)
36208 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36210 break;
36212 case BUILT_IN_ICEIL:
36213 case BUILT_IN_LCEIL:
36214 case BUILT_IN_LLCEIL:
36215 /* The round insn does not trap on denormals. */
36216 if (flag_trapping_math || !TARGET_ROUND)
36217 break;
36219 if (out_mode == SImode && in_mode == DFmode)
36221 if (out_n == 4 && in_n == 2)
36222 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36223 else if (out_n == 8 && in_n == 4)
36224 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36225 else if (out_n == 16 && in_n == 8)
36226 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36228 break;
36230 case BUILT_IN_ICEILF:
36231 case BUILT_IN_LCEILF:
36232 case BUILT_IN_LLCEILF:
36233 /* The round insn does not trap on denormals. */
36234 if (flag_trapping_math || !TARGET_ROUND)
36235 break;
36237 if (out_mode == SImode && in_mode == SFmode)
36239 if (out_n == 4 && in_n == 4)
36240 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36241 else if (out_n == 8 && in_n == 8)
36242 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36244 break;
36246 case BUILT_IN_IRINT:
36247 case BUILT_IN_LRINT:
36248 case BUILT_IN_LLRINT:
36249 if (out_mode == SImode && in_mode == DFmode)
36251 if (out_n == 4 && in_n == 2)
36252 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36253 else if (out_n == 8 && in_n == 4)
36254 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36256 break;
36258 case BUILT_IN_IRINTF:
36259 case BUILT_IN_LRINTF:
36260 case BUILT_IN_LLRINTF:
36261 if (out_mode == SImode && in_mode == SFmode)
36263 if (out_n == 4 && in_n == 4)
36264 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36265 else if (out_n == 8 && in_n == 8)
36266 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36268 break;
36270 case BUILT_IN_IROUND:
36271 case BUILT_IN_LROUND:
36272 case BUILT_IN_LLROUND:
36273 /* The round insn does not trap on denormals. */
36274 if (flag_trapping_math || !TARGET_ROUND)
36275 break;
36277 if (out_mode == SImode && in_mode == DFmode)
36279 if (out_n == 4 && in_n == 2)
36280 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36281 else if (out_n == 8 && in_n == 4)
36282 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36283 else if (out_n == 16 && in_n == 8)
36284 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36286 break;
36288 case BUILT_IN_IROUNDF:
36289 case BUILT_IN_LROUNDF:
36290 case BUILT_IN_LLROUNDF:
36291 /* The round insn does not trap on denormals. */
36292 if (flag_trapping_math || !TARGET_ROUND)
36293 break;
36295 if (out_mode == SImode && in_mode == SFmode)
36297 if (out_n == 4 && in_n == 4)
36298 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36299 else if (out_n == 8 && in_n == 8)
36300 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36302 break;
36304 case BUILT_IN_COPYSIGN:
36305 if (out_mode == DFmode && in_mode == DFmode)
36307 if (out_n == 2 && in_n == 2)
36308 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36309 else if (out_n == 4 && in_n == 4)
36310 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36311 else if (out_n == 8 && in_n == 8)
36312 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36314 break;
36316 case BUILT_IN_COPYSIGNF:
36317 if (out_mode == SFmode && in_mode == SFmode)
36319 if (out_n == 4 && in_n == 4)
36320 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36321 else if (out_n == 8 && in_n == 8)
36322 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36323 else if (out_n == 16 && in_n == 16)
36324 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36326 break;
36328 case BUILT_IN_FLOOR:
36329 /* The round insn does not trap on denormals. */
36330 if (flag_trapping_math || !TARGET_ROUND)
36331 break;
36333 if (out_mode == DFmode && in_mode == DFmode)
36335 if (out_n == 2 && in_n == 2)
36336 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36337 else if (out_n == 4 && in_n == 4)
36338 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36340 break;
36342 case BUILT_IN_FLOORF:
36343 /* The round insn does not trap on denormals. */
36344 if (flag_trapping_math || !TARGET_ROUND)
36345 break;
36347 if (out_mode == SFmode && in_mode == SFmode)
36349 if (out_n == 4 && in_n == 4)
36350 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36351 else if (out_n == 8 && in_n == 8)
36352 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36354 break;
36356 case BUILT_IN_CEIL:
36357 /* The round insn does not trap on denormals. */
36358 if (flag_trapping_math || !TARGET_ROUND)
36359 break;
36361 if (out_mode == DFmode && in_mode == DFmode)
36363 if (out_n == 2 && in_n == 2)
36364 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36365 else if (out_n == 4 && in_n == 4)
36366 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36368 break;
36370 case BUILT_IN_CEILF:
36371 /* The round insn does not trap on denormals. */
36372 if (flag_trapping_math || !TARGET_ROUND)
36373 break;
36375 if (out_mode == SFmode && in_mode == SFmode)
36377 if (out_n == 4 && in_n == 4)
36378 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36379 else if (out_n == 8 && in_n == 8)
36380 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36382 break;
36384 case BUILT_IN_TRUNC:
36385 /* The round insn does not trap on denormals. */
36386 if (flag_trapping_math || !TARGET_ROUND)
36387 break;
36389 if (out_mode == DFmode && in_mode == DFmode)
36391 if (out_n == 2 && in_n == 2)
36392 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36393 else if (out_n == 4 && in_n == 4)
36394 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36396 break;
36398 case BUILT_IN_TRUNCF:
36399 /* The round insn does not trap on denormals. */
36400 if (flag_trapping_math || !TARGET_ROUND)
36401 break;
36403 if (out_mode == SFmode && in_mode == SFmode)
36405 if (out_n == 4 && in_n == 4)
36406 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36407 else if (out_n == 8 && in_n == 8)
36408 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36410 break;
36412 case BUILT_IN_RINT:
36413 /* The round insn does not trap on denormals. */
36414 if (flag_trapping_math || !TARGET_ROUND)
36415 break;
36417 if (out_mode == DFmode && in_mode == DFmode)
36419 if (out_n == 2 && in_n == 2)
36420 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36421 else if (out_n == 4 && in_n == 4)
36422 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36424 break;
36426 case BUILT_IN_RINTF:
36427 /* The round insn does not trap on denormals. */
36428 if (flag_trapping_math || !TARGET_ROUND)
36429 break;
36431 if (out_mode == SFmode && in_mode == SFmode)
36433 if (out_n == 4 && in_n == 4)
36434 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36435 else if (out_n == 8 && in_n == 8)
36436 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36438 break;
36440 case BUILT_IN_ROUND:
36441 /* The round insn does not trap on denormals. */
36442 if (flag_trapping_math || !TARGET_ROUND)
36443 break;
36445 if (out_mode == DFmode && in_mode == DFmode)
36447 if (out_n == 2 && in_n == 2)
36448 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36449 else if (out_n == 4 && in_n == 4)
36450 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36452 break;
36454 case BUILT_IN_ROUNDF:
36455 /* The round insn does not trap on denormals. */
36456 if (flag_trapping_math || !TARGET_ROUND)
36457 break;
36459 if (out_mode == SFmode && in_mode == SFmode)
36461 if (out_n == 4 && in_n == 4)
36462 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36463 else if (out_n == 8 && in_n == 8)
36464 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36466 break;
36468 case BUILT_IN_FMA:
36469 if (out_mode == DFmode && in_mode == DFmode)
36471 if (out_n == 2 && in_n == 2)
36472 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36473 if (out_n == 4 && in_n == 4)
36474 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36476 break;
36478 case BUILT_IN_FMAF:
36479 if (out_mode == SFmode && in_mode == SFmode)
36481 if (out_n == 4 && in_n == 4)
36482 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36483 if (out_n == 8 && in_n == 8)
36484 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36486 break;
36488 default:
36489 break;
36492 /* Dispatch to a handler for a vectorization library. */
36493 if (ix86_veclib_handler)
36494 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36495 type_in);
36497 return NULL_TREE;
36500 /* Handler for an SVML-style interface to
36501 a library with vectorized intrinsics. */
36503 static tree
36504 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36506 char name[20];
36507 tree fntype, new_fndecl, args;
36508 unsigned arity;
36509 const char *bname;
36510 enum machine_mode el_mode, in_mode;
36511 int n, in_n;
36513 /* The SVML is suitable for unsafe math only. */
36514 if (!flag_unsafe_math_optimizations)
36515 return NULL_TREE;
36517 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36518 n = TYPE_VECTOR_SUBPARTS (type_out);
36519 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36520 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36521 if (el_mode != in_mode
36522 || n != in_n)
36523 return NULL_TREE;
36525 switch (fn)
36527 case BUILT_IN_EXP:
36528 case BUILT_IN_LOG:
36529 case BUILT_IN_LOG10:
36530 case BUILT_IN_POW:
36531 case BUILT_IN_TANH:
36532 case BUILT_IN_TAN:
36533 case BUILT_IN_ATAN:
36534 case BUILT_IN_ATAN2:
36535 case BUILT_IN_ATANH:
36536 case BUILT_IN_CBRT:
36537 case BUILT_IN_SINH:
36538 case BUILT_IN_SIN:
36539 case BUILT_IN_ASINH:
36540 case BUILT_IN_ASIN:
36541 case BUILT_IN_COSH:
36542 case BUILT_IN_COS:
36543 case BUILT_IN_ACOSH:
36544 case BUILT_IN_ACOS:
36545 if (el_mode != DFmode || n != 2)
36546 return NULL_TREE;
36547 break;
36549 case BUILT_IN_EXPF:
36550 case BUILT_IN_LOGF:
36551 case BUILT_IN_LOG10F:
36552 case BUILT_IN_POWF:
36553 case BUILT_IN_TANHF:
36554 case BUILT_IN_TANF:
36555 case BUILT_IN_ATANF:
36556 case BUILT_IN_ATAN2F:
36557 case BUILT_IN_ATANHF:
36558 case BUILT_IN_CBRTF:
36559 case BUILT_IN_SINHF:
36560 case BUILT_IN_SINF:
36561 case BUILT_IN_ASINHF:
36562 case BUILT_IN_ASINF:
36563 case BUILT_IN_COSHF:
36564 case BUILT_IN_COSF:
36565 case BUILT_IN_ACOSHF:
36566 case BUILT_IN_ACOSF:
36567 if (el_mode != SFmode || n != 4)
36568 return NULL_TREE;
36569 break;
36571 default:
36572 return NULL_TREE;
36575 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36577 if (fn == BUILT_IN_LOGF)
36578 strcpy (name, "vmlsLn4");
36579 else if (fn == BUILT_IN_LOG)
36580 strcpy (name, "vmldLn2");
36581 else if (n == 4)
36583 sprintf (name, "vmls%s", bname+10);
36584 name[strlen (name)-1] = '4';
36586 else
36587 sprintf (name, "vmld%s2", bname+10);
36589 /* Convert to uppercase. */
36590 name[4] &= ~0x20;
36592 arity = 0;
36593 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36594 args;
36595 args = TREE_CHAIN (args))
36596 arity++;
36598 if (arity == 1)
36599 fntype = build_function_type_list (type_out, type_in, NULL);
36600 else
36601 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36603 /* Build a function declaration for the vectorized function. */
36604 new_fndecl = build_decl (BUILTINS_LOCATION,
36605 FUNCTION_DECL, get_identifier (name), fntype);
36606 TREE_PUBLIC (new_fndecl) = 1;
36607 DECL_EXTERNAL (new_fndecl) = 1;
36608 DECL_IS_NOVOPS (new_fndecl) = 1;
36609 TREE_READONLY (new_fndecl) = 1;
36611 return new_fndecl;
36614 /* Handler for an ACML-style interface to
36615 a library with vectorized intrinsics. */
36617 static tree
36618 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36620 char name[20] = "__vr.._";
36621 tree fntype, new_fndecl, args;
36622 unsigned arity;
36623 const char *bname;
36624 enum machine_mode el_mode, in_mode;
36625 int n, in_n;
36627 /* The ACML is 64bits only and suitable for unsafe math only as
36628 it does not correctly support parts of IEEE with the required
36629 precision such as denormals. */
36630 if (!TARGET_64BIT
36631 || !flag_unsafe_math_optimizations)
36632 return NULL_TREE;
36634 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36635 n = TYPE_VECTOR_SUBPARTS (type_out);
36636 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36637 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36638 if (el_mode != in_mode
36639 || n != in_n)
36640 return NULL_TREE;
36642 switch (fn)
36644 case BUILT_IN_SIN:
36645 case BUILT_IN_COS:
36646 case BUILT_IN_EXP:
36647 case BUILT_IN_LOG:
36648 case BUILT_IN_LOG2:
36649 case BUILT_IN_LOG10:
36650 name[4] = 'd';
36651 name[5] = '2';
36652 if (el_mode != DFmode
36653 || n != 2)
36654 return NULL_TREE;
36655 break;
36657 case BUILT_IN_SINF:
36658 case BUILT_IN_COSF:
36659 case BUILT_IN_EXPF:
36660 case BUILT_IN_POWF:
36661 case BUILT_IN_LOGF:
36662 case BUILT_IN_LOG2F:
36663 case BUILT_IN_LOG10F:
36664 name[4] = 's';
36665 name[5] = '4';
36666 if (el_mode != SFmode
36667 || n != 4)
36668 return NULL_TREE;
36669 break;
36671 default:
36672 return NULL_TREE;
36675 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36676 sprintf (name + 7, "%s", bname+10);
36678 arity = 0;
36679 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36680 args;
36681 args = TREE_CHAIN (args))
36682 arity++;
36684 if (arity == 1)
36685 fntype = build_function_type_list (type_out, type_in, NULL);
36686 else
36687 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36689 /* Build a function declaration for the vectorized function. */
36690 new_fndecl = build_decl (BUILTINS_LOCATION,
36691 FUNCTION_DECL, get_identifier (name), fntype);
36692 TREE_PUBLIC (new_fndecl) = 1;
36693 DECL_EXTERNAL (new_fndecl) = 1;
36694 DECL_IS_NOVOPS (new_fndecl) = 1;
36695 TREE_READONLY (new_fndecl) = 1;
36697 return new_fndecl;
36700 /* Returns a decl of a function that implements gather load with
36701 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36702 Return NULL_TREE if it is not available. */
36704 static tree
36705 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36706 const_tree index_type, int scale)
36708 bool si;
36709 enum ix86_builtins code;
36711 if (! TARGET_AVX2)
36712 return NULL_TREE;
36714 if ((TREE_CODE (index_type) != INTEGER_TYPE
36715 && !POINTER_TYPE_P (index_type))
36716 || (TYPE_MODE (index_type) != SImode
36717 && TYPE_MODE (index_type) != DImode))
36718 return NULL_TREE;
36720 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36721 return NULL_TREE;
36723 /* v*gather* insn sign extends index to pointer mode. */
36724 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36725 && TYPE_UNSIGNED (index_type))
36726 return NULL_TREE;
36728 if (scale <= 0
36729 || scale > 8
36730 || (scale & (scale - 1)) != 0)
36731 return NULL_TREE;
36733 si = TYPE_MODE (index_type) == SImode;
36734 switch (TYPE_MODE (mem_vectype))
36736 case V2DFmode:
36737 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36738 break;
36739 case V4DFmode:
36740 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36741 break;
36742 case V2DImode:
36743 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36744 break;
36745 case V4DImode:
36746 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36747 break;
36748 case V4SFmode:
36749 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36750 break;
36751 case V8SFmode:
36752 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36753 break;
36754 case V4SImode:
36755 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36756 break;
36757 case V8SImode:
36758 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36759 break;
36760 case V8DFmode:
36761 if (TARGET_AVX512F)
36762 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36763 else
36764 return NULL_TREE;
36765 break;
36766 case V8DImode:
36767 if (TARGET_AVX512F)
36768 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36769 else
36770 return NULL_TREE;
36771 break;
36772 case V16SFmode:
36773 if (TARGET_AVX512F)
36774 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36775 else
36776 return NULL_TREE;
36777 break;
36778 case V16SImode:
36779 if (TARGET_AVX512F)
36780 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36781 else
36782 return NULL_TREE;
36783 break;
36784 default:
36785 return NULL_TREE;
36788 return ix86_get_builtin (code);
36791 /* Returns a code for a target-specific builtin that implements
36792 reciprocal of the function, or NULL_TREE if not available. */
36794 static tree
36795 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36796 bool sqrt ATTRIBUTE_UNUSED)
36798 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36799 && flag_finite_math_only && !flag_trapping_math
36800 && flag_unsafe_math_optimizations))
36801 return NULL_TREE;
36803 if (md_fn)
36804 /* Machine dependent builtins. */
36805 switch (fn)
36807 /* Vectorized version of sqrt to rsqrt conversion. */
36808 case IX86_BUILTIN_SQRTPS_NR:
36809 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36811 case IX86_BUILTIN_SQRTPS_NR256:
36812 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36814 default:
36815 return NULL_TREE;
36817 else
36818 /* Normal builtins. */
36819 switch (fn)
36821 /* Sqrt to rsqrt conversion. */
36822 case BUILT_IN_SQRTF:
36823 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36825 default:
36826 return NULL_TREE;
36830 /* Helper for avx_vpermilps256_operand et al. This is also used by
36831 the expansion functions to turn the parallel back into a mask.
36832 The return value is 0 for no match and the imm8+1 for a match. */
36835 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36837 unsigned i, nelt = GET_MODE_NUNITS (mode);
36838 unsigned mask = 0;
36839 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36841 if (XVECLEN (par, 0) != (int) nelt)
36842 return 0;
36844 /* Validate that all of the elements are constants, and not totally
36845 out of range. Copy the data into an integral array to make the
36846 subsequent checks easier. */
36847 for (i = 0; i < nelt; ++i)
36849 rtx er = XVECEXP (par, 0, i);
36850 unsigned HOST_WIDE_INT ei;
36852 if (!CONST_INT_P (er))
36853 return 0;
36854 ei = INTVAL (er);
36855 if (ei >= nelt)
36856 return 0;
36857 ipar[i] = ei;
36860 switch (mode)
36862 case V8DFmode:
36863 /* In the 512-bit DFmode case, we can only move elements within
36864 a 128-bit lane. First fill the second part of the mask,
36865 then fallthru. */
36866 for (i = 4; i < 6; ++i)
36868 if (ipar[i] < 4 || ipar[i] >= 6)
36869 return 0;
36870 mask |= (ipar[i] - 4) << i;
36872 for (i = 6; i < 8; ++i)
36874 if (ipar[i] < 6)
36875 return 0;
36876 mask |= (ipar[i] - 6) << i;
36878 /* FALLTHRU */
36880 case V4DFmode:
36881 /* In the 256-bit DFmode case, we can only move elements within
36882 a 128-bit lane. */
36883 for (i = 0; i < 2; ++i)
36885 if (ipar[i] >= 2)
36886 return 0;
36887 mask |= ipar[i] << i;
36889 for (i = 2; i < 4; ++i)
36891 if (ipar[i] < 2)
36892 return 0;
36893 mask |= (ipar[i] - 2) << i;
36895 break;
36897 case V16SFmode:
36898 /* In 512 bit SFmode case, permutation in the upper 256 bits
36899 must mirror the permutation in the lower 256-bits. */
36900 for (i = 0; i < 8; ++i)
36901 if (ipar[i] + 8 != ipar[i + 8])
36902 return 0;
36903 /* FALLTHRU */
36905 case V8SFmode:
36906 /* In 256 bit SFmode case, we have full freedom of
36907 movement within the low 128-bit lane, but the high 128-bit
36908 lane must mirror the exact same pattern. */
36909 for (i = 0; i < 4; ++i)
36910 if (ipar[i] + 4 != ipar[i + 4])
36911 return 0;
36912 nelt = 4;
36913 /* FALLTHRU */
36915 case V2DFmode:
36916 case V4SFmode:
36917 /* In the 128-bit case, we've full freedom in the placement of
36918 the elements from the source operand. */
36919 for (i = 0; i < nelt; ++i)
36920 mask |= ipar[i] << (i * (nelt / 2));
36921 break;
36923 default:
36924 gcc_unreachable ();
36927 /* Make sure success has a non-zero value by adding one. */
36928 return mask + 1;
36931 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
36932 the expansion functions to turn the parallel back into a mask.
36933 The return value is 0 for no match and the imm8+1 for a match. */
36936 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
36938 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
36939 unsigned mask = 0;
36940 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
36942 if (XVECLEN (par, 0) != (int) nelt)
36943 return 0;
36945 /* Validate that all of the elements are constants, and not totally
36946 out of range. Copy the data into an integral array to make the
36947 subsequent checks easier. */
36948 for (i = 0; i < nelt; ++i)
36950 rtx er = XVECEXP (par, 0, i);
36951 unsigned HOST_WIDE_INT ei;
36953 if (!CONST_INT_P (er))
36954 return 0;
36955 ei = INTVAL (er);
36956 if (ei >= 2 * nelt)
36957 return 0;
36958 ipar[i] = ei;
36961 /* Validate that the halves of the permute are halves. */
36962 for (i = 0; i < nelt2 - 1; ++i)
36963 if (ipar[i] + 1 != ipar[i + 1])
36964 return 0;
36965 for (i = nelt2; i < nelt - 1; ++i)
36966 if (ipar[i] + 1 != ipar[i + 1])
36967 return 0;
36969 /* Reconstruct the mask. */
36970 for (i = 0; i < 2; ++i)
36972 unsigned e = ipar[i * nelt2];
36973 if (e % nelt2)
36974 return 0;
36975 e /= nelt2;
36976 mask |= e << (i * 4);
36979 /* Make sure success has a non-zero value by adding one. */
36980 return mask + 1;
36983 /* Store OPERAND to the memory after reload is completed. This means
36984 that we can't easily use assign_stack_local. */
36986 ix86_force_to_memory (enum machine_mode mode, rtx operand)
36988 rtx result;
36990 gcc_assert (reload_completed);
36991 if (ix86_using_red_zone ())
36993 result = gen_rtx_MEM (mode,
36994 gen_rtx_PLUS (Pmode,
36995 stack_pointer_rtx,
36996 GEN_INT (-RED_ZONE_SIZE)));
36997 emit_move_insn (result, operand);
36999 else if (TARGET_64BIT)
37001 switch (mode)
37003 case HImode:
37004 case SImode:
37005 operand = gen_lowpart (DImode, operand);
37006 /* FALLTHRU */
37007 case DImode:
37008 emit_insn (
37009 gen_rtx_SET (VOIDmode,
37010 gen_rtx_MEM (DImode,
37011 gen_rtx_PRE_DEC (DImode,
37012 stack_pointer_rtx)),
37013 operand));
37014 break;
37015 default:
37016 gcc_unreachable ();
37018 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37020 else
37022 switch (mode)
37024 case DImode:
37026 rtx operands[2];
37027 split_double_mode (mode, &operand, 1, operands, operands + 1);
37028 emit_insn (
37029 gen_rtx_SET (VOIDmode,
37030 gen_rtx_MEM (SImode,
37031 gen_rtx_PRE_DEC (Pmode,
37032 stack_pointer_rtx)),
37033 operands[1]));
37034 emit_insn (
37035 gen_rtx_SET (VOIDmode,
37036 gen_rtx_MEM (SImode,
37037 gen_rtx_PRE_DEC (Pmode,
37038 stack_pointer_rtx)),
37039 operands[0]));
37041 break;
37042 case HImode:
37043 /* Store HImodes as SImodes. */
37044 operand = gen_lowpart (SImode, operand);
37045 /* FALLTHRU */
37046 case SImode:
37047 emit_insn (
37048 gen_rtx_SET (VOIDmode,
37049 gen_rtx_MEM (GET_MODE (operand),
37050 gen_rtx_PRE_DEC (SImode,
37051 stack_pointer_rtx)),
37052 operand));
37053 break;
37054 default:
37055 gcc_unreachable ();
37057 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37059 return result;
37062 /* Free operand from the memory. */
37063 void
37064 ix86_free_from_memory (enum machine_mode mode)
37066 if (!ix86_using_red_zone ())
37068 int size;
37070 if (mode == DImode || TARGET_64BIT)
37071 size = 8;
37072 else
37073 size = 4;
37074 /* Use LEA to deallocate stack space. In peephole2 it will be converted
37075 to pop or add instruction if registers are available. */
37076 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
37077 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
37078 GEN_INT (size))));
37082 /* Return a register priority for hard reg REGNO. */
37083 static int
37084 ix86_register_priority (int hard_regno)
37086 /* ebp and r13 as the base always wants a displacement, r12 as the
37087 base always wants an index. So discourage their usage in an
37088 address. */
37089 if (hard_regno == R12_REG || hard_regno == R13_REG)
37090 return 0;
37091 if (hard_regno == BP_REG)
37092 return 1;
37093 /* New x86-64 int registers result in bigger code size. Discourage
37094 them. */
37095 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37096 return 2;
37097 /* New x86-64 SSE registers result in bigger code size. Discourage
37098 them. */
37099 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37100 return 2;
37101 /* Usage of AX register results in smaller code. Prefer it. */
37102 if (hard_regno == 0)
37103 return 4;
37104 return 3;
37107 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37109 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37110 QImode must go into class Q_REGS.
37111 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37112 movdf to do mem-to-mem moves through integer regs. */
37114 static reg_class_t
37115 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37117 enum machine_mode mode = GET_MODE (x);
37119 /* We're only allowed to return a subclass of CLASS. Many of the
37120 following checks fail for NO_REGS, so eliminate that early. */
37121 if (regclass == NO_REGS)
37122 return NO_REGS;
37124 /* All classes can load zeros. */
37125 if (x == CONST0_RTX (mode))
37126 return regclass;
37128 /* Force constants into memory if we are loading a (nonzero) constant into
37129 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37130 instructions to load from a constant. */
37131 if (CONSTANT_P (x)
37132 && (MAYBE_MMX_CLASS_P (regclass)
37133 || MAYBE_SSE_CLASS_P (regclass)
37134 || MAYBE_MASK_CLASS_P (regclass)))
37135 return NO_REGS;
37137 /* Prefer SSE regs only, if we can use them for math. */
37138 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37139 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37141 /* Floating-point constants need more complex checks. */
37142 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37144 /* General regs can load everything. */
37145 if (reg_class_subset_p (regclass, GENERAL_REGS))
37146 return regclass;
37148 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37149 zero above. We only want to wind up preferring 80387 registers if
37150 we plan on doing computation with them. */
37151 if (TARGET_80387
37152 && standard_80387_constant_p (x) > 0)
37154 /* Limit class to non-sse. */
37155 if (regclass == FLOAT_SSE_REGS)
37156 return FLOAT_REGS;
37157 if (regclass == FP_TOP_SSE_REGS)
37158 return FP_TOP_REG;
37159 if (regclass == FP_SECOND_SSE_REGS)
37160 return FP_SECOND_REG;
37161 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37162 return regclass;
37165 return NO_REGS;
37168 /* Generally when we see PLUS here, it's the function invariant
37169 (plus soft-fp const_int). Which can only be computed into general
37170 regs. */
37171 if (GET_CODE (x) == PLUS)
37172 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37174 /* QImode constants are easy to load, but non-constant QImode data
37175 must go into Q_REGS. */
37176 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37178 if (reg_class_subset_p (regclass, Q_REGS))
37179 return regclass;
37180 if (reg_class_subset_p (Q_REGS, regclass))
37181 return Q_REGS;
37182 return NO_REGS;
37185 return regclass;
37188 /* Discourage putting floating-point values in SSE registers unless
37189 SSE math is being used, and likewise for the 387 registers. */
37190 static reg_class_t
37191 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37193 enum machine_mode mode = GET_MODE (x);
37195 /* Restrict the output reload class to the register bank that we are doing
37196 math on. If we would like not to return a subset of CLASS, reject this
37197 alternative: if reload cannot do this, it will still use its choice. */
37198 mode = GET_MODE (x);
37199 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37200 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37202 if (X87_FLOAT_MODE_P (mode))
37204 if (regclass == FP_TOP_SSE_REGS)
37205 return FP_TOP_REG;
37206 else if (regclass == FP_SECOND_SSE_REGS)
37207 return FP_SECOND_REG;
37208 else
37209 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37212 return regclass;
37215 static reg_class_t
37216 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37217 enum machine_mode mode, secondary_reload_info *sri)
37219 /* Double-word spills from general registers to non-offsettable memory
37220 references (zero-extended addresses) require special handling. */
37221 if (TARGET_64BIT
37222 && MEM_P (x)
37223 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37224 && INTEGER_CLASS_P (rclass)
37225 && !offsettable_memref_p (x))
37227 sri->icode = (in_p
37228 ? CODE_FOR_reload_noff_load
37229 : CODE_FOR_reload_noff_store);
37230 /* Add the cost of moving address to a temporary. */
37231 sri->extra_cost = 1;
37233 return NO_REGS;
37236 /* QImode spills from non-QI registers require
37237 intermediate register on 32bit targets. */
37238 if (mode == QImode
37239 && (MAYBE_MASK_CLASS_P (rclass)
37240 || (!TARGET_64BIT && !in_p
37241 && INTEGER_CLASS_P (rclass)
37242 && MAYBE_NON_Q_CLASS_P (rclass))))
37244 int regno;
37246 if (REG_P (x))
37247 regno = REGNO (x);
37248 else
37249 regno = -1;
37251 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37252 regno = true_regnum (x);
37254 /* Return Q_REGS if the operand is in memory. */
37255 if (regno == -1)
37256 return Q_REGS;
37259 /* This condition handles corner case where an expression involving
37260 pointers gets vectorized. We're trying to use the address of a
37261 stack slot as a vector initializer.
37263 (set (reg:V2DI 74 [ vect_cst_.2 ])
37264 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37266 Eventually frame gets turned into sp+offset like this:
37268 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37269 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37270 (const_int 392 [0x188]))))
37272 That later gets turned into:
37274 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37275 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37276 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37278 We'll have the following reload recorded:
37280 Reload 0: reload_in (DI) =
37281 (plus:DI (reg/f:DI 7 sp)
37282 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37283 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37284 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37285 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37286 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37287 reload_reg_rtx: (reg:V2DI 22 xmm1)
37289 Which isn't going to work since SSE instructions can't handle scalar
37290 additions. Returning GENERAL_REGS forces the addition into integer
37291 register and reload can handle subsequent reloads without problems. */
37293 if (in_p && GET_CODE (x) == PLUS
37294 && SSE_CLASS_P (rclass)
37295 && SCALAR_INT_MODE_P (mode))
37296 return GENERAL_REGS;
37298 return NO_REGS;
37301 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37303 static bool
37304 ix86_class_likely_spilled_p (reg_class_t rclass)
37306 switch (rclass)
37308 case AREG:
37309 case DREG:
37310 case CREG:
37311 case BREG:
37312 case AD_REGS:
37313 case SIREG:
37314 case DIREG:
37315 case SSE_FIRST_REG:
37316 case FP_TOP_REG:
37317 case FP_SECOND_REG:
37318 return true;
37320 default:
37321 break;
37324 return false;
37327 /* If we are copying between general and FP registers, we need a memory
37328 location. The same is true for SSE and MMX registers.
37330 To optimize register_move_cost performance, allow inline variant.
37332 The macro can't work reliably when one of the CLASSES is class containing
37333 registers from multiple units (SSE, MMX, integer). We avoid this by never
37334 combining those units in single alternative in the machine description.
37335 Ensure that this constraint holds to avoid unexpected surprises.
37337 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37338 enforce these sanity checks. */
37340 static inline bool
37341 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37342 enum machine_mode mode, int strict)
37344 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37345 return false;
37346 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37347 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37348 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37349 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37350 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37351 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37353 gcc_assert (!strict || lra_in_progress);
37354 return true;
37357 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37358 return true;
37360 /* ??? This is a lie. We do have moves between mmx/general, and for
37361 mmx/sse2. But by saying we need secondary memory we discourage the
37362 register allocator from using the mmx registers unless needed. */
37363 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37364 return true;
37366 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37368 /* SSE1 doesn't have any direct moves from other classes. */
37369 if (!TARGET_SSE2)
37370 return true;
37372 /* If the target says that inter-unit moves are more expensive
37373 than moving through memory, then don't generate them. */
37374 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37375 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37376 return true;
37378 /* Between SSE and general, we have moves no larger than word size. */
37379 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37380 return true;
37383 return false;
37386 bool
37387 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37388 enum machine_mode mode, int strict)
37390 return inline_secondary_memory_needed (class1, class2, mode, strict);
37393 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37395 On the 80386, this is the size of MODE in words,
37396 except in the FP regs, where a single reg is always enough. */
37398 static unsigned char
37399 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37401 if (MAYBE_INTEGER_CLASS_P (rclass))
37403 if (mode == XFmode)
37404 return (TARGET_64BIT ? 2 : 3);
37405 else if (mode == XCmode)
37406 return (TARGET_64BIT ? 4 : 6);
37407 else
37408 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37410 else
37412 if (COMPLEX_MODE_P (mode))
37413 return 2;
37414 else
37415 return 1;
37419 /* Return true if the registers in CLASS cannot represent the change from
37420 modes FROM to TO. */
37422 bool
37423 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37424 enum reg_class regclass)
37426 if (from == to)
37427 return false;
37429 /* x87 registers can't do subreg at all, as all values are reformatted
37430 to extended precision. */
37431 if (MAYBE_FLOAT_CLASS_P (regclass))
37432 return true;
37434 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37436 /* Vector registers do not support QI or HImode loads. If we don't
37437 disallow a change to these modes, reload will assume it's ok to
37438 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37439 the vec_dupv4hi pattern. */
37440 if (GET_MODE_SIZE (from) < 4)
37441 return true;
37443 /* Vector registers do not support subreg with nonzero offsets, which
37444 are otherwise valid for integer registers. Since we can't see
37445 whether we have a nonzero offset from here, prohibit all
37446 nonparadoxical subregs changing size. */
37447 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37448 return true;
37451 return false;
37454 /* Return the cost of moving data of mode M between a
37455 register and memory. A value of 2 is the default; this cost is
37456 relative to those in `REGISTER_MOVE_COST'.
37458 This function is used extensively by register_move_cost that is used to
37459 build tables at startup. Make it inline in this case.
37460 When IN is 2, return maximum of in and out move cost.
37462 If moving between registers and memory is more expensive than
37463 between two registers, you should define this macro to express the
37464 relative cost.
37466 Model also increased moving costs of QImode registers in non
37467 Q_REGS classes.
37469 static inline int
37470 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37471 int in)
37473 int cost;
37474 if (FLOAT_CLASS_P (regclass))
37476 int index;
37477 switch (mode)
37479 case SFmode:
37480 index = 0;
37481 break;
37482 case DFmode:
37483 index = 1;
37484 break;
37485 case XFmode:
37486 index = 2;
37487 break;
37488 default:
37489 return 100;
37491 if (in == 2)
37492 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37493 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37495 if (SSE_CLASS_P (regclass))
37497 int index;
37498 switch (GET_MODE_SIZE (mode))
37500 case 4:
37501 index = 0;
37502 break;
37503 case 8:
37504 index = 1;
37505 break;
37506 case 16:
37507 index = 2;
37508 break;
37509 default:
37510 return 100;
37512 if (in == 2)
37513 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37514 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37516 if (MMX_CLASS_P (regclass))
37518 int index;
37519 switch (GET_MODE_SIZE (mode))
37521 case 4:
37522 index = 0;
37523 break;
37524 case 8:
37525 index = 1;
37526 break;
37527 default:
37528 return 100;
37530 if (in)
37531 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37532 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37534 switch (GET_MODE_SIZE (mode))
37536 case 1:
37537 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37539 if (!in)
37540 return ix86_cost->int_store[0];
37541 if (TARGET_PARTIAL_REG_DEPENDENCY
37542 && optimize_function_for_speed_p (cfun))
37543 cost = ix86_cost->movzbl_load;
37544 else
37545 cost = ix86_cost->int_load[0];
37546 if (in == 2)
37547 return MAX (cost, ix86_cost->int_store[0]);
37548 return cost;
37550 else
37552 if (in == 2)
37553 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37554 if (in)
37555 return ix86_cost->movzbl_load;
37556 else
37557 return ix86_cost->int_store[0] + 4;
37559 break;
37560 case 2:
37561 if (in == 2)
37562 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37563 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37564 default:
37565 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37566 if (mode == TFmode)
37567 mode = XFmode;
37568 if (in == 2)
37569 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37570 else if (in)
37571 cost = ix86_cost->int_load[2];
37572 else
37573 cost = ix86_cost->int_store[2];
37574 return (cost * (((int) GET_MODE_SIZE (mode)
37575 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37579 static int
37580 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37581 bool in)
37583 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37587 /* Return the cost of moving data from a register in class CLASS1 to
37588 one in class CLASS2.
37590 It is not required that the cost always equal 2 when FROM is the same as TO;
37591 on some machines it is expensive to move between registers if they are not
37592 general registers. */
37594 static int
37595 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37596 reg_class_t class2_i)
37598 enum reg_class class1 = (enum reg_class) class1_i;
37599 enum reg_class class2 = (enum reg_class) class2_i;
37601 /* In case we require secondary memory, compute cost of the store followed
37602 by load. In order to avoid bad register allocation choices, we need
37603 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37605 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37607 int cost = 1;
37609 cost += inline_memory_move_cost (mode, class1, 2);
37610 cost += inline_memory_move_cost (mode, class2, 2);
37612 /* In case of copying from general_purpose_register we may emit multiple
37613 stores followed by single load causing memory size mismatch stall.
37614 Count this as arbitrarily high cost of 20. */
37615 if (targetm.class_max_nregs (class1, mode)
37616 > targetm.class_max_nregs (class2, mode))
37617 cost += 20;
37619 /* In the case of FP/MMX moves, the registers actually overlap, and we
37620 have to switch modes in order to treat them differently. */
37621 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37622 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37623 cost += 20;
37625 return cost;
37628 /* Moves between SSE/MMX and integer unit are expensive. */
37629 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37630 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37632 /* ??? By keeping returned value relatively high, we limit the number
37633 of moves between integer and MMX/SSE registers for all targets.
37634 Additionally, high value prevents problem with x86_modes_tieable_p(),
37635 where integer modes in MMX/SSE registers are not tieable
37636 because of missing QImode and HImode moves to, from or between
37637 MMX/SSE registers. */
37638 return MAX (8, ix86_cost->mmxsse_to_integer);
37640 if (MAYBE_FLOAT_CLASS_P (class1))
37641 return ix86_cost->fp_move;
37642 if (MAYBE_SSE_CLASS_P (class1))
37643 return ix86_cost->sse_move;
37644 if (MAYBE_MMX_CLASS_P (class1))
37645 return ix86_cost->mmx_move;
37646 return 2;
37649 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37650 MODE. */
37652 bool
37653 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37655 /* Flags and only flags can only hold CCmode values. */
37656 if (CC_REGNO_P (regno))
37657 return GET_MODE_CLASS (mode) == MODE_CC;
37658 if (GET_MODE_CLASS (mode) == MODE_CC
37659 || GET_MODE_CLASS (mode) == MODE_RANDOM
37660 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37661 return false;
37662 if (STACK_REGNO_P (regno))
37663 return VALID_FP_MODE_P (mode);
37664 if (MASK_REGNO_P (regno))
37665 return VALID_MASK_REG_MODE (mode);
37666 if (SSE_REGNO_P (regno))
37668 /* We implement the move patterns for all vector modes into and
37669 out of SSE registers, even when no operation instructions
37670 are available. */
37672 /* For AVX-512 we allow, regardless of regno:
37673 - XI mode
37674 - any of 512-bit wide vector mode
37675 - any scalar mode. */
37676 if (TARGET_AVX512F
37677 && (mode == XImode
37678 || VALID_AVX512F_REG_MODE (mode)
37679 || VALID_AVX512F_SCALAR_MODE (mode)))
37680 return true;
37682 /* xmm16-xmm31 are only available for AVX-512. */
37683 if (EXT_REX_SSE_REGNO_P (regno))
37684 return false;
37686 /* OImode and AVX modes are available only when AVX is enabled. */
37687 return ((TARGET_AVX
37688 && VALID_AVX256_REG_OR_OI_MODE (mode))
37689 || VALID_SSE_REG_MODE (mode)
37690 || VALID_SSE2_REG_MODE (mode)
37691 || VALID_MMX_REG_MODE (mode)
37692 || VALID_MMX_REG_MODE_3DNOW (mode));
37694 if (MMX_REGNO_P (regno))
37696 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37697 so if the register is available at all, then we can move data of
37698 the given mode into or out of it. */
37699 return (VALID_MMX_REG_MODE (mode)
37700 || VALID_MMX_REG_MODE_3DNOW (mode));
37703 if (mode == QImode)
37705 /* Take care for QImode values - they can be in non-QI regs,
37706 but then they do cause partial register stalls. */
37707 if (ANY_QI_REGNO_P (regno))
37708 return true;
37709 if (!TARGET_PARTIAL_REG_STALL)
37710 return true;
37711 /* LRA checks if the hard register is OK for the given mode.
37712 QImode values can live in non-QI regs, so we allow all
37713 registers here. */
37714 if (lra_in_progress)
37715 return true;
37716 return !can_create_pseudo_p ();
37718 /* We handle both integer and floats in the general purpose registers. */
37719 else if (VALID_INT_MODE_P (mode))
37720 return true;
37721 else if (VALID_FP_MODE_P (mode))
37722 return true;
37723 else if (VALID_DFP_MODE_P (mode))
37724 return true;
37725 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37726 on to use that value in smaller contexts, this can easily force a
37727 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37728 supporting DImode, allow it. */
37729 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37730 return true;
37732 return false;
37735 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37736 tieable integer mode. */
37738 static bool
37739 ix86_tieable_integer_mode_p (enum machine_mode mode)
37741 switch (mode)
37743 case HImode:
37744 case SImode:
37745 return true;
37747 case QImode:
37748 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37750 case DImode:
37751 return TARGET_64BIT;
37753 default:
37754 return false;
37758 /* Return true if MODE1 is accessible in a register that can hold MODE2
37759 without copying. That is, all register classes that can hold MODE2
37760 can also hold MODE1. */
37762 bool
37763 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37765 if (mode1 == mode2)
37766 return true;
37768 if (ix86_tieable_integer_mode_p (mode1)
37769 && ix86_tieable_integer_mode_p (mode2))
37770 return true;
37772 /* MODE2 being XFmode implies fp stack or general regs, which means we
37773 can tie any smaller floating point modes to it. Note that we do not
37774 tie this with TFmode. */
37775 if (mode2 == XFmode)
37776 return mode1 == SFmode || mode1 == DFmode;
37778 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37779 that we can tie it with SFmode. */
37780 if (mode2 == DFmode)
37781 return mode1 == SFmode;
37783 /* If MODE2 is only appropriate for an SSE register, then tie with
37784 any other mode acceptable to SSE registers. */
37785 if (GET_MODE_SIZE (mode2) == 32
37786 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37787 return (GET_MODE_SIZE (mode1) == 32
37788 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37789 if (GET_MODE_SIZE (mode2) == 16
37790 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37791 return (GET_MODE_SIZE (mode1) == 16
37792 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37794 /* If MODE2 is appropriate for an MMX register, then tie
37795 with any other mode acceptable to MMX registers. */
37796 if (GET_MODE_SIZE (mode2) == 8
37797 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37798 return (GET_MODE_SIZE (mode1) == 8
37799 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37801 return false;
37804 /* Return the cost of moving between two registers of mode MODE. */
37806 static int
37807 ix86_set_reg_reg_cost (enum machine_mode mode)
37809 unsigned int units = UNITS_PER_WORD;
37811 switch (GET_MODE_CLASS (mode))
37813 default:
37814 break;
37816 case MODE_CC:
37817 units = GET_MODE_SIZE (CCmode);
37818 break;
37820 case MODE_FLOAT:
37821 if ((TARGET_SSE && mode == TFmode)
37822 || (TARGET_80387 && mode == XFmode)
37823 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37824 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37825 units = GET_MODE_SIZE (mode);
37826 break;
37828 case MODE_COMPLEX_FLOAT:
37829 if ((TARGET_SSE && mode == TCmode)
37830 || (TARGET_80387 && mode == XCmode)
37831 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37832 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37833 units = GET_MODE_SIZE (mode);
37834 break;
37836 case MODE_VECTOR_INT:
37837 case MODE_VECTOR_FLOAT:
37838 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37839 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37840 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37841 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37842 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37843 units = GET_MODE_SIZE (mode);
37846 /* Return the cost of moving between two registers of mode MODE,
37847 assuming that the move will be in pieces of at most UNITS bytes. */
37848 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37851 /* Compute a (partial) cost for rtx X. Return true if the complete
37852 cost has been computed, and false if subexpressions should be
37853 scanned. In either case, *TOTAL contains the cost result. */
37855 static bool
37856 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37857 bool speed)
37859 rtx mask;
37860 enum rtx_code code = (enum rtx_code) code_i;
37861 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37862 enum machine_mode mode = GET_MODE (x);
37863 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37865 switch (code)
37867 case SET:
37868 if (register_operand (SET_DEST (x), VOIDmode)
37869 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37871 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37872 return true;
37874 return false;
37876 case CONST_INT:
37877 case CONST:
37878 case LABEL_REF:
37879 case SYMBOL_REF:
37880 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37881 *total = 3;
37882 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37883 *total = 2;
37884 else if (flag_pic && SYMBOLIC_CONST (x)
37885 && (!TARGET_64BIT
37886 || (!GET_CODE (x) != LABEL_REF
37887 && (GET_CODE (x) != SYMBOL_REF
37888 || !SYMBOL_REF_LOCAL_P (x)))))
37889 *total = 1;
37890 else
37891 *total = 0;
37892 return true;
37894 case CONST_DOUBLE:
37895 if (mode == VOIDmode)
37897 *total = 0;
37898 return true;
37900 switch (standard_80387_constant_p (x))
37902 case 1: /* 0.0 */
37903 *total = 1;
37904 return true;
37905 default: /* Other constants */
37906 *total = 2;
37907 return true;
37908 case 0:
37909 case -1:
37910 break;
37912 if (SSE_FLOAT_MODE_P (mode))
37914 case CONST_VECTOR:
37915 switch (standard_sse_constant_p (x))
37917 case 0:
37918 break;
37919 case 1: /* 0: xor eliminates false dependency */
37920 *total = 0;
37921 return true;
37922 default: /* -1: cmp contains false dependency */
37923 *total = 1;
37924 return true;
37927 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37928 it'll probably end up. Add a penalty for size. */
37929 *total = (COSTS_N_INSNS (1)
37930 + (flag_pic != 0 && !TARGET_64BIT)
37931 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37932 return true;
37934 case ZERO_EXTEND:
37935 /* The zero extensions is often completely free on x86_64, so make
37936 it as cheap as possible. */
37937 if (TARGET_64BIT && mode == DImode
37938 && GET_MODE (XEXP (x, 0)) == SImode)
37939 *total = 1;
37940 else if (TARGET_ZERO_EXTEND_WITH_AND)
37941 *total = cost->add;
37942 else
37943 *total = cost->movzx;
37944 return false;
37946 case SIGN_EXTEND:
37947 *total = cost->movsx;
37948 return false;
37950 case ASHIFT:
37951 if (SCALAR_INT_MODE_P (mode)
37952 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37953 && CONST_INT_P (XEXP (x, 1)))
37955 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37956 if (value == 1)
37958 *total = cost->add;
37959 return false;
37961 if ((value == 2 || value == 3)
37962 && cost->lea <= cost->shift_const)
37964 *total = cost->lea;
37965 return false;
37968 /* FALLTHRU */
37970 case ROTATE:
37971 case ASHIFTRT:
37972 case LSHIFTRT:
37973 case ROTATERT:
37974 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37976 /* ??? Should be SSE vector operation cost. */
37977 /* At least for published AMD latencies, this really is the same
37978 as the latency for a simple fpu operation like fabs. */
37979 /* V*QImode is emulated with 1-11 insns. */
37980 if (mode == V16QImode || mode == V32QImode)
37982 int count = 11;
37983 if (TARGET_XOP && mode == V16QImode)
37985 /* For XOP we use vpshab, which requires a broadcast of the
37986 value to the variable shift insn. For constants this
37987 means a V16Q const in mem; even when we can perform the
37988 shift with one insn set the cost to prefer paddb. */
37989 if (CONSTANT_P (XEXP (x, 1)))
37991 *total = (cost->fabs
37992 + rtx_cost (XEXP (x, 0), code, 0, speed)
37993 + (speed ? 2 : COSTS_N_BYTES (16)));
37994 return true;
37996 count = 3;
37998 else if (TARGET_SSSE3)
37999 count = 7;
38000 *total = cost->fabs * count;
38002 else
38003 *total = cost->fabs;
38005 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38007 if (CONST_INT_P (XEXP (x, 1)))
38009 if (INTVAL (XEXP (x, 1)) > 32)
38010 *total = cost->shift_const + COSTS_N_INSNS (2);
38011 else
38012 *total = cost->shift_const * 2;
38014 else
38016 if (GET_CODE (XEXP (x, 1)) == AND)
38017 *total = cost->shift_var * 2;
38018 else
38019 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38022 else
38024 if (CONST_INT_P (XEXP (x, 1)))
38025 *total = cost->shift_const;
38026 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38027 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38029 /* Return the cost after shift-and truncation. */
38030 *total = cost->shift_var;
38031 return true;
38033 else
38034 *total = cost->shift_var;
38036 return false;
38038 case FMA:
38040 rtx sub;
38042 gcc_assert (FLOAT_MODE_P (mode));
38043 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38045 /* ??? SSE scalar/vector cost should be used here. */
38046 /* ??? Bald assumption that fma has the same cost as fmul. */
38047 *total = cost->fmul;
38048 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38050 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38051 sub = XEXP (x, 0);
38052 if (GET_CODE (sub) == NEG)
38053 sub = XEXP (sub, 0);
38054 *total += rtx_cost (sub, FMA, 0, speed);
38056 sub = XEXP (x, 2);
38057 if (GET_CODE (sub) == NEG)
38058 sub = XEXP (sub, 0);
38059 *total += rtx_cost (sub, FMA, 2, speed);
38060 return true;
38063 case MULT:
38064 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38066 /* ??? SSE scalar cost should be used here. */
38067 *total = cost->fmul;
38068 return false;
38070 else if (X87_FLOAT_MODE_P (mode))
38072 *total = cost->fmul;
38073 return false;
38075 else if (FLOAT_MODE_P (mode))
38077 /* ??? SSE vector cost should be used here. */
38078 *total = cost->fmul;
38079 return false;
38081 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38083 /* V*QImode is emulated with 7-13 insns. */
38084 if (mode == V16QImode || mode == V32QImode)
38086 int extra = 11;
38087 if (TARGET_XOP && mode == V16QImode)
38088 extra = 5;
38089 else if (TARGET_SSSE3)
38090 extra = 6;
38091 *total = cost->fmul * 2 + cost->fabs * extra;
38093 /* V*DImode is emulated with 5-8 insns. */
38094 else if (mode == V2DImode || mode == V4DImode)
38096 if (TARGET_XOP && mode == V2DImode)
38097 *total = cost->fmul * 2 + cost->fabs * 3;
38098 else
38099 *total = cost->fmul * 3 + cost->fabs * 5;
38101 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38102 insns, including two PMULUDQ. */
38103 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38104 *total = cost->fmul * 2 + cost->fabs * 5;
38105 else
38106 *total = cost->fmul;
38107 return false;
38109 else
38111 rtx op0 = XEXP (x, 0);
38112 rtx op1 = XEXP (x, 1);
38113 int nbits;
38114 if (CONST_INT_P (XEXP (x, 1)))
38116 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38117 for (nbits = 0; value != 0; value &= value - 1)
38118 nbits++;
38120 else
38121 /* This is arbitrary. */
38122 nbits = 7;
38124 /* Compute costs correctly for widening multiplication. */
38125 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38126 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38127 == GET_MODE_SIZE (mode))
38129 int is_mulwiden = 0;
38130 enum machine_mode inner_mode = GET_MODE (op0);
38132 if (GET_CODE (op0) == GET_CODE (op1))
38133 is_mulwiden = 1, op1 = XEXP (op1, 0);
38134 else if (CONST_INT_P (op1))
38136 if (GET_CODE (op0) == SIGN_EXTEND)
38137 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38138 == INTVAL (op1);
38139 else
38140 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38143 if (is_mulwiden)
38144 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38147 *total = (cost->mult_init[MODE_INDEX (mode)]
38148 + nbits * cost->mult_bit
38149 + rtx_cost (op0, outer_code, opno, speed)
38150 + rtx_cost (op1, outer_code, opno, speed));
38152 return true;
38155 case DIV:
38156 case UDIV:
38157 case MOD:
38158 case UMOD:
38159 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38160 /* ??? SSE cost should be used here. */
38161 *total = cost->fdiv;
38162 else if (X87_FLOAT_MODE_P (mode))
38163 *total = cost->fdiv;
38164 else if (FLOAT_MODE_P (mode))
38165 /* ??? SSE vector cost should be used here. */
38166 *total = cost->fdiv;
38167 else
38168 *total = cost->divide[MODE_INDEX (mode)];
38169 return false;
38171 case PLUS:
38172 if (GET_MODE_CLASS (mode) == MODE_INT
38173 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38175 if (GET_CODE (XEXP (x, 0)) == PLUS
38176 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38177 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38178 && CONSTANT_P (XEXP (x, 1)))
38180 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38181 if (val == 2 || val == 4 || val == 8)
38183 *total = cost->lea;
38184 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38185 outer_code, opno, speed);
38186 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38187 outer_code, opno, speed);
38188 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38189 return true;
38192 else if (GET_CODE (XEXP (x, 0)) == MULT
38193 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38195 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38196 if (val == 2 || val == 4 || val == 8)
38198 *total = cost->lea;
38199 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38200 outer_code, opno, speed);
38201 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38202 return true;
38205 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38207 *total = cost->lea;
38208 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38209 outer_code, opno, speed);
38210 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38211 outer_code, opno, speed);
38212 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38213 return true;
38216 /* FALLTHRU */
38218 case MINUS:
38219 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38221 /* ??? SSE cost should be used here. */
38222 *total = cost->fadd;
38223 return false;
38225 else if (X87_FLOAT_MODE_P (mode))
38227 *total = cost->fadd;
38228 return false;
38230 else if (FLOAT_MODE_P (mode))
38232 /* ??? SSE vector cost should be used here. */
38233 *total = cost->fadd;
38234 return false;
38236 /* FALLTHRU */
38238 case AND:
38239 case IOR:
38240 case XOR:
38241 if (GET_MODE_CLASS (mode) == MODE_INT
38242 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38244 *total = (cost->add * 2
38245 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38246 << (GET_MODE (XEXP (x, 0)) != DImode))
38247 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38248 << (GET_MODE (XEXP (x, 1)) != DImode)));
38249 return true;
38251 /* FALLTHRU */
38253 case NEG:
38254 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38256 /* ??? SSE cost should be used here. */
38257 *total = cost->fchs;
38258 return false;
38260 else if (X87_FLOAT_MODE_P (mode))
38262 *total = cost->fchs;
38263 return false;
38265 else if (FLOAT_MODE_P (mode))
38267 /* ??? SSE vector cost should be used here. */
38268 *total = cost->fchs;
38269 return false;
38271 /* FALLTHRU */
38273 case NOT:
38274 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38276 /* ??? Should be SSE vector operation cost. */
38277 /* At least for published AMD latencies, this really is the same
38278 as the latency for a simple fpu operation like fabs. */
38279 *total = cost->fabs;
38281 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38282 *total = cost->add * 2;
38283 else
38284 *total = cost->add;
38285 return false;
38287 case COMPARE:
38288 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38289 && XEXP (XEXP (x, 0), 1) == const1_rtx
38290 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38291 && XEXP (x, 1) == const0_rtx)
38293 /* This kind of construct is implemented using test[bwl].
38294 Treat it as if we had an AND. */
38295 *total = (cost->add
38296 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38297 + rtx_cost (const1_rtx, outer_code, opno, speed));
38298 return true;
38300 return false;
38302 case FLOAT_EXTEND:
38303 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38304 *total = 0;
38305 return false;
38307 case ABS:
38308 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38309 /* ??? SSE cost should be used here. */
38310 *total = cost->fabs;
38311 else if (X87_FLOAT_MODE_P (mode))
38312 *total = cost->fabs;
38313 else if (FLOAT_MODE_P (mode))
38314 /* ??? SSE vector cost should be used here. */
38315 *total = cost->fabs;
38316 return false;
38318 case SQRT:
38319 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38320 /* ??? SSE cost should be used here. */
38321 *total = cost->fsqrt;
38322 else if (X87_FLOAT_MODE_P (mode))
38323 *total = cost->fsqrt;
38324 else if (FLOAT_MODE_P (mode))
38325 /* ??? SSE vector cost should be used here. */
38326 *total = cost->fsqrt;
38327 return false;
38329 case UNSPEC:
38330 if (XINT (x, 1) == UNSPEC_TP)
38331 *total = 0;
38332 return false;
38334 case VEC_SELECT:
38335 case VEC_CONCAT:
38336 case VEC_DUPLICATE:
38337 /* ??? Assume all of these vector manipulation patterns are
38338 recognizable. In which case they all pretty much have the
38339 same cost. */
38340 *total = cost->fabs;
38341 return true;
38342 case VEC_MERGE:
38343 mask = XEXP (x, 2);
38344 /* This is masked instruction, assume the same cost,
38345 as nonmasked variant. */
38346 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38347 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38348 else
38349 *total = cost->fabs;
38350 return true;
38352 default:
38353 return false;
38357 #if TARGET_MACHO
38359 static int current_machopic_label_num;
38361 /* Given a symbol name and its associated stub, write out the
38362 definition of the stub. */
38364 void
38365 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38367 unsigned int length;
38368 char *binder_name, *symbol_name, lazy_ptr_name[32];
38369 int label = ++current_machopic_label_num;
38371 /* For 64-bit we shouldn't get here. */
38372 gcc_assert (!TARGET_64BIT);
38374 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38375 symb = targetm.strip_name_encoding (symb);
38377 length = strlen (stub);
38378 binder_name = XALLOCAVEC (char, length + 32);
38379 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38381 length = strlen (symb);
38382 symbol_name = XALLOCAVEC (char, length + 32);
38383 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38385 sprintf (lazy_ptr_name, "L%d$lz", label);
38387 if (MACHOPIC_ATT_STUB)
38388 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38389 else if (MACHOPIC_PURE)
38390 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38391 else
38392 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38394 fprintf (file, "%s:\n", stub);
38395 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38397 if (MACHOPIC_ATT_STUB)
38399 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38401 else if (MACHOPIC_PURE)
38403 /* PIC stub. */
38404 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38405 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38406 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38407 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38408 label, lazy_ptr_name, label);
38409 fprintf (file, "\tjmp\t*%%ecx\n");
38411 else
38412 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38414 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38415 it needs no stub-binding-helper. */
38416 if (MACHOPIC_ATT_STUB)
38417 return;
38419 fprintf (file, "%s:\n", binder_name);
38421 if (MACHOPIC_PURE)
38423 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38424 fprintf (file, "\tpushl\t%%ecx\n");
38426 else
38427 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38429 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38431 /* N.B. Keep the correspondence of these
38432 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38433 old-pic/new-pic/non-pic stubs; altering this will break
38434 compatibility with existing dylibs. */
38435 if (MACHOPIC_PURE)
38437 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38438 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38440 else
38441 /* 16-byte -mdynamic-no-pic stub. */
38442 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38444 fprintf (file, "%s:\n", lazy_ptr_name);
38445 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38446 fprintf (file, ASM_LONG "%s\n", binder_name);
38448 #endif /* TARGET_MACHO */
38450 /* Order the registers for register allocator. */
38452 void
38453 x86_order_regs_for_local_alloc (void)
38455 int pos = 0;
38456 int i;
38458 /* First allocate the local general purpose registers. */
38459 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38460 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38461 reg_alloc_order [pos++] = i;
38463 /* Global general purpose registers. */
38464 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38465 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38466 reg_alloc_order [pos++] = i;
38468 /* x87 registers come first in case we are doing FP math
38469 using them. */
38470 if (!TARGET_SSE_MATH)
38471 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38472 reg_alloc_order [pos++] = i;
38474 /* SSE registers. */
38475 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38476 reg_alloc_order [pos++] = i;
38477 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38478 reg_alloc_order [pos++] = i;
38480 /* Extended REX SSE registers. */
38481 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38482 reg_alloc_order [pos++] = i;
38484 /* Mask register. */
38485 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38486 reg_alloc_order [pos++] = i;
38488 /* x87 registers. */
38489 if (TARGET_SSE_MATH)
38490 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38491 reg_alloc_order [pos++] = i;
38493 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38494 reg_alloc_order [pos++] = i;
38496 /* Initialize the rest of array as we do not allocate some registers
38497 at all. */
38498 while (pos < FIRST_PSEUDO_REGISTER)
38499 reg_alloc_order [pos++] = 0;
38502 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38503 in struct attribute_spec handler. */
38504 static tree
38505 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38506 tree args,
38507 int flags ATTRIBUTE_UNUSED,
38508 bool *no_add_attrs)
38510 if (TREE_CODE (*node) != FUNCTION_TYPE
38511 && TREE_CODE (*node) != METHOD_TYPE
38512 && TREE_CODE (*node) != FIELD_DECL
38513 && TREE_CODE (*node) != TYPE_DECL)
38515 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38516 name);
38517 *no_add_attrs = true;
38518 return NULL_TREE;
38520 if (TARGET_64BIT)
38522 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38523 name);
38524 *no_add_attrs = true;
38525 return NULL_TREE;
38527 if (is_attribute_p ("callee_pop_aggregate_return", name))
38529 tree cst;
38531 cst = TREE_VALUE (args);
38532 if (TREE_CODE (cst) != INTEGER_CST)
38534 warning (OPT_Wattributes,
38535 "%qE attribute requires an integer constant argument",
38536 name);
38537 *no_add_attrs = true;
38539 else if (compare_tree_int (cst, 0) != 0
38540 && compare_tree_int (cst, 1) != 0)
38542 warning (OPT_Wattributes,
38543 "argument to %qE attribute is neither zero, nor one",
38544 name);
38545 *no_add_attrs = true;
38548 return NULL_TREE;
38551 return NULL_TREE;
38554 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38555 struct attribute_spec.handler. */
38556 static tree
38557 ix86_handle_abi_attribute (tree *node, tree name,
38558 tree args ATTRIBUTE_UNUSED,
38559 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38561 if (TREE_CODE (*node) != FUNCTION_TYPE
38562 && TREE_CODE (*node) != METHOD_TYPE
38563 && TREE_CODE (*node) != FIELD_DECL
38564 && TREE_CODE (*node) != TYPE_DECL)
38566 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38567 name);
38568 *no_add_attrs = true;
38569 return NULL_TREE;
38572 /* Can combine regparm with all attributes but fastcall. */
38573 if (is_attribute_p ("ms_abi", name))
38575 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38577 error ("ms_abi and sysv_abi attributes are not compatible");
38580 return NULL_TREE;
38582 else if (is_attribute_p ("sysv_abi", name))
38584 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38586 error ("ms_abi and sysv_abi attributes are not compatible");
38589 return NULL_TREE;
38592 return NULL_TREE;
38595 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38596 struct attribute_spec.handler. */
38597 static tree
38598 ix86_handle_struct_attribute (tree *node, tree name,
38599 tree args ATTRIBUTE_UNUSED,
38600 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38602 tree *type = NULL;
38603 if (DECL_P (*node))
38605 if (TREE_CODE (*node) == TYPE_DECL)
38606 type = &TREE_TYPE (*node);
38608 else
38609 type = node;
38611 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38613 warning (OPT_Wattributes, "%qE attribute ignored",
38614 name);
38615 *no_add_attrs = true;
38618 else if ((is_attribute_p ("ms_struct", name)
38619 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38620 || ((is_attribute_p ("gcc_struct", name)
38621 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38623 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38624 name);
38625 *no_add_attrs = true;
38628 return NULL_TREE;
38631 static tree
38632 ix86_handle_fndecl_attribute (tree *node, tree name,
38633 tree args ATTRIBUTE_UNUSED,
38634 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38636 if (TREE_CODE (*node) != FUNCTION_DECL)
38638 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38639 name);
38640 *no_add_attrs = true;
38642 return NULL_TREE;
38645 static bool
38646 ix86_ms_bitfield_layout_p (const_tree record_type)
38648 return ((TARGET_MS_BITFIELD_LAYOUT
38649 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38650 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38653 /* Returns an expression indicating where the this parameter is
38654 located on entry to the FUNCTION. */
38656 static rtx
38657 x86_this_parameter (tree function)
38659 tree type = TREE_TYPE (function);
38660 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38661 int nregs;
38663 if (TARGET_64BIT)
38665 const int *parm_regs;
38667 if (ix86_function_type_abi (type) == MS_ABI)
38668 parm_regs = x86_64_ms_abi_int_parameter_registers;
38669 else
38670 parm_regs = x86_64_int_parameter_registers;
38671 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38674 nregs = ix86_function_regparm (type, function);
38676 if (nregs > 0 && !stdarg_p (type))
38678 int regno;
38679 unsigned int ccvt = ix86_get_callcvt (type);
38681 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38682 regno = aggr ? DX_REG : CX_REG;
38683 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38685 regno = CX_REG;
38686 if (aggr)
38687 return gen_rtx_MEM (SImode,
38688 plus_constant (Pmode, stack_pointer_rtx, 4));
38690 else
38692 regno = AX_REG;
38693 if (aggr)
38695 regno = DX_REG;
38696 if (nregs == 1)
38697 return gen_rtx_MEM (SImode,
38698 plus_constant (Pmode,
38699 stack_pointer_rtx, 4));
38702 return gen_rtx_REG (SImode, regno);
38705 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38706 aggr ? 8 : 4));
38709 /* Determine whether x86_output_mi_thunk can succeed. */
38711 static bool
38712 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38713 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38714 HOST_WIDE_INT vcall_offset, const_tree function)
38716 /* 64-bit can handle anything. */
38717 if (TARGET_64BIT)
38718 return true;
38720 /* For 32-bit, everything's fine if we have one free register. */
38721 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38722 return true;
38724 /* Need a free register for vcall_offset. */
38725 if (vcall_offset)
38726 return false;
38728 /* Need a free register for GOT references. */
38729 if (flag_pic && !targetm.binds_local_p (function))
38730 return false;
38732 /* Otherwise ok. */
38733 return true;
38736 /* Output the assembler code for a thunk function. THUNK_DECL is the
38737 declaration for the thunk function itself, FUNCTION is the decl for
38738 the target function. DELTA is an immediate constant offset to be
38739 added to THIS. If VCALL_OFFSET is nonzero, the word at
38740 *(*this + vcall_offset) should be added to THIS. */
38742 static void
38743 x86_output_mi_thunk (FILE *file,
38744 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38745 HOST_WIDE_INT vcall_offset, tree function)
38747 rtx this_param = x86_this_parameter (function);
38748 rtx this_reg, tmp, fnaddr;
38749 unsigned int tmp_regno;
38751 if (TARGET_64BIT)
38752 tmp_regno = R10_REG;
38753 else
38755 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38756 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38757 tmp_regno = AX_REG;
38758 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38759 tmp_regno = DX_REG;
38760 else
38761 tmp_regno = CX_REG;
38764 emit_note (NOTE_INSN_PROLOGUE_END);
38766 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38767 pull it in now and let DELTA benefit. */
38768 if (REG_P (this_param))
38769 this_reg = this_param;
38770 else if (vcall_offset)
38772 /* Put the this parameter into %eax. */
38773 this_reg = gen_rtx_REG (Pmode, AX_REG);
38774 emit_move_insn (this_reg, this_param);
38776 else
38777 this_reg = NULL_RTX;
38779 /* Adjust the this parameter by a fixed constant. */
38780 if (delta)
38782 rtx delta_rtx = GEN_INT (delta);
38783 rtx delta_dst = this_reg ? this_reg : this_param;
38785 if (TARGET_64BIT)
38787 if (!x86_64_general_operand (delta_rtx, Pmode))
38789 tmp = gen_rtx_REG (Pmode, tmp_regno);
38790 emit_move_insn (tmp, delta_rtx);
38791 delta_rtx = tmp;
38795 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38798 /* Adjust the this parameter by a value stored in the vtable. */
38799 if (vcall_offset)
38801 rtx vcall_addr, vcall_mem, this_mem;
38803 tmp = gen_rtx_REG (Pmode, tmp_regno);
38805 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38806 if (Pmode != ptr_mode)
38807 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38808 emit_move_insn (tmp, this_mem);
38810 /* Adjust the this parameter. */
38811 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38812 if (TARGET_64BIT
38813 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38815 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38816 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38817 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38820 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38821 if (Pmode != ptr_mode)
38822 emit_insn (gen_addsi_1_zext (this_reg,
38823 gen_rtx_REG (ptr_mode,
38824 REGNO (this_reg)),
38825 vcall_mem));
38826 else
38827 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38830 /* If necessary, drop THIS back to its stack slot. */
38831 if (this_reg && this_reg != this_param)
38832 emit_move_insn (this_param, this_reg);
38834 fnaddr = XEXP (DECL_RTL (function), 0);
38835 if (TARGET_64BIT)
38837 if (!flag_pic || targetm.binds_local_p (function)
38838 || TARGET_PECOFF)
38840 else
38842 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38843 tmp = gen_rtx_CONST (Pmode, tmp);
38844 fnaddr = gen_rtx_MEM (Pmode, tmp);
38847 else
38849 if (!flag_pic || targetm.binds_local_p (function))
38851 #if TARGET_MACHO
38852 else if (TARGET_MACHO)
38854 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38855 fnaddr = XEXP (fnaddr, 0);
38857 #endif /* TARGET_MACHO */
38858 else
38860 tmp = gen_rtx_REG (Pmode, CX_REG);
38861 output_set_got (tmp, NULL_RTX);
38863 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38864 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
38865 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
38869 /* Our sibling call patterns do not allow memories, because we have no
38870 predicate that can distinguish between frame and non-frame memory.
38871 For our purposes here, we can get away with (ab)using a jump pattern,
38872 because we're going to do no optimization. */
38873 if (MEM_P (fnaddr))
38874 emit_jump_insn (gen_indirect_jump (fnaddr));
38875 else
38877 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38878 fnaddr = legitimize_pic_address (fnaddr,
38879 gen_rtx_REG (Pmode, tmp_regno));
38881 if (!sibcall_insn_operand (fnaddr, word_mode))
38883 tmp = gen_rtx_REG (word_mode, tmp_regno);
38884 if (GET_MODE (fnaddr) != word_mode)
38885 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38886 emit_move_insn (tmp, fnaddr);
38887 fnaddr = tmp;
38890 tmp = gen_rtx_MEM (QImode, fnaddr);
38891 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38892 tmp = emit_call_insn (tmp);
38893 SIBLING_CALL_P (tmp) = 1;
38895 emit_barrier ();
38897 /* Emit just enough of rest_of_compilation to get the insns emitted.
38898 Note that use_thunk calls assemble_start_function et al. */
38899 tmp = get_insns ();
38900 shorten_branches (tmp);
38901 final_start_function (tmp, file, 1);
38902 final (tmp, file, 1);
38903 final_end_function ();
38906 static void
38907 x86_file_start (void)
38909 default_file_start ();
38910 if (TARGET_16BIT)
38911 fputs ("\t.code16gcc\n", asm_out_file);
38912 #if TARGET_MACHO
38913 darwin_file_start ();
38914 #endif
38915 if (X86_FILE_START_VERSION_DIRECTIVE)
38916 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38917 if (X86_FILE_START_FLTUSED)
38918 fputs ("\t.global\t__fltused\n", asm_out_file);
38919 if (ix86_asm_dialect == ASM_INTEL)
38920 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38924 x86_field_alignment (tree field, int computed)
38926 enum machine_mode mode;
38927 tree type = TREE_TYPE (field);
38929 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38930 return computed;
38931 mode = TYPE_MODE (strip_array_types (type));
38932 if (mode == DFmode || mode == DCmode
38933 || GET_MODE_CLASS (mode) == MODE_INT
38934 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38935 return MIN (32, computed);
38936 return computed;
38939 /* Output assembler code to FILE to increment profiler label # LABELNO
38940 for profiling a function entry. */
38941 void
38942 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38944 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38945 : MCOUNT_NAME);
38947 if (TARGET_64BIT)
38949 #ifndef NO_PROFILE_COUNTERS
38950 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38951 #endif
38953 if (!TARGET_PECOFF && flag_pic)
38954 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38955 else
38956 fprintf (file, "\tcall\t%s\n", mcount_name);
38958 else if (flag_pic)
38960 #ifndef NO_PROFILE_COUNTERS
38961 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38962 LPREFIX, labelno);
38963 #endif
38964 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38966 else
38968 #ifndef NO_PROFILE_COUNTERS
38969 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38970 LPREFIX, labelno);
38971 #endif
38972 fprintf (file, "\tcall\t%s\n", mcount_name);
38976 /* We don't have exact information about the insn sizes, but we may assume
38977 quite safely that we are informed about all 1 byte insns and memory
38978 address sizes. This is enough to eliminate unnecessary padding in
38979 99% of cases. */
38981 static int
38982 min_insn_size (rtx insn)
38984 int l = 0, len;
38986 if (!INSN_P (insn) || !active_insn_p (insn))
38987 return 0;
38989 /* Discard alignments we've emit and jump instructions. */
38990 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
38991 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
38992 return 0;
38994 /* Important case - calls are always 5 bytes.
38995 It is common to have many calls in the row. */
38996 if (CALL_P (insn)
38997 && symbolic_reference_mentioned_p (PATTERN (insn))
38998 && !SIBLING_CALL_P (insn))
38999 return 5;
39000 len = get_attr_length (insn);
39001 if (len <= 1)
39002 return 1;
39004 /* For normal instructions we rely on get_attr_length being exact,
39005 with a few exceptions. */
39006 if (!JUMP_P (insn))
39008 enum attr_type type = get_attr_type (insn);
39010 switch (type)
39012 case TYPE_MULTI:
39013 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39014 || asm_noperands (PATTERN (insn)) >= 0)
39015 return 0;
39016 break;
39017 case TYPE_OTHER:
39018 case TYPE_FCMP:
39019 break;
39020 default:
39021 /* Otherwise trust get_attr_length. */
39022 return len;
39025 l = get_attr_length_address (insn);
39026 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39027 l = 4;
39029 if (l)
39030 return 1+l;
39031 else
39032 return 2;
39035 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39037 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39038 window. */
39040 static void
39041 ix86_avoid_jump_mispredicts (void)
39043 rtx insn, start = get_insns ();
39044 int nbytes = 0, njumps = 0;
39045 int isjump = 0;
39047 /* Look for all minimal intervals of instructions containing 4 jumps.
39048 The intervals are bounded by START and INSN. NBYTES is the total
39049 size of instructions in the interval including INSN and not including
39050 START. When the NBYTES is smaller than 16 bytes, it is possible
39051 that the end of START and INSN ends up in the same 16byte page.
39053 The smallest offset in the page INSN can start is the case where START
39054 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39055 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39057 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39058 have to, control transfer to label(s) can be performed through other
39059 means, and also we estimate minimum length of all asm stmts as 0. */
39060 for (insn = start; insn; insn = NEXT_INSN (insn))
39062 int min_size;
39064 if (LABEL_P (insn))
39066 int align = label_to_alignment (insn);
39067 int max_skip = label_to_max_skip (insn);
39069 if (max_skip > 15)
39070 max_skip = 15;
39071 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39072 already in the current 16 byte page, because otherwise
39073 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39074 bytes to reach 16 byte boundary. */
39075 if (align <= 0
39076 || (align <= 3 && max_skip != (1 << align) - 1))
39077 max_skip = 0;
39078 if (dump_file)
39079 fprintf (dump_file, "Label %i with max_skip %i\n",
39080 INSN_UID (insn), max_skip);
39081 if (max_skip)
39083 while (nbytes + max_skip >= 16)
39085 start = NEXT_INSN (start);
39086 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39087 || CALL_P (start))
39088 njumps--, isjump = 1;
39089 else
39090 isjump = 0;
39091 nbytes -= min_insn_size (start);
39094 continue;
39097 min_size = min_insn_size (insn);
39098 nbytes += min_size;
39099 if (dump_file)
39100 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39101 INSN_UID (insn), min_size);
39102 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39103 || CALL_P (insn))
39104 njumps++;
39105 else
39106 continue;
39108 while (njumps > 3)
39110 start = NEXT_INSN (start);
39111 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39112 || CALL_P (start))
39113 njumps--, isjump = 1;
39114 else
39115 isjump = 0;
39116 nbytes -= min_insn_size (start);
39118 gcc_assert (njumps >= 0);
39119 if (dump_file)
39120 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39121 INSN_UID (start), INSN_UID (insn), nbytes);
39123 if (njumps == 3 && isjump && nbytes < 16)
39125 int padsize = 15 - nbytes + min_insn_size (insn);
39127 if (dump_file)
39128 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39129 INSN_UID (insn), padsize);
39130 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39134 #endif
39136 /* AMD Athlon works faster
39137 when RET is not destination of conditional jump or directly preceded
39138 by other jump instruction. We avoid the penalty by inserting NOP just
39139 before the RET instructions in such cases. */
39140 static void
39141 ix86_pad_returns (void)
39143 edge e;
39144 edge_iterator ei;
39146 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39148 basic_block bb = e->src;
39149 rtx ret = BB_END (bb);
39150 rtx prev;
39151 bool replace = false;
39153 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39154 || optimize_bb_for_size_p (bb))
39155 continue;
39156 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39157 if (active_insn_p (prev) || LABEL_P (prev))
39158 break;
39159 if (prev && LABEL_P (prev))
39161 edge e;
39162 edge_iterator ei;
39164 FOR_EACH_EDGE (e, ei, bb->preds)
39165 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39166 && !(e->flags & EDGE_FALLTHRU))
39168 replace = true;
39169 break;
39172 if (!replace)
39174 prev = prev_active_insn (ret);
39175 if (prev
39176 && ((JUMP_P (prev) && any_condjump_p (prev))
39177 || CALL_P (prev)))
39178 replace = true;
39179 /* Empty functions get branch mispredict even when
39180 the jump destination is not visible to us. */
39181 if (!prev && !optimize_function_for_size_p (cfun))
39182 replace = true;
39184 if (replace)
39186 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39187 delete_insn (ret);
39192 /* Count the minimum number of instructions in BB. Return 4 if the
39193 number of instructions >= 4. */
39195 static int
39196 ix86_count_insn_bb (basic_block bb)
39198 rtx insn;
39199 int insn_count = 0;
39201 /* Count number of instructions in this block. Return 4 if the number
39202 of instructions >= 4. */
39203 FOR_BB_INSNS (bb, insn)
39205 /* Only happen in exit blocks. */
39206 if (JUMP_P (insn)
39207 && ANY_RETURN_P (PATTERN (insn)))
39208 break;
39210 if (NONDEBUG_INSN_P (insn)
39211 && GET_CODE (PATTERN (insn)) != USE
39212 && GET_CODE (PATTERN (insn)) != CLOBBER)
39214 insn_count++;
39215 if (insn_count >= 4)
39216 return insn_count;
39220 return insn_count;
39224 /* Count the minimum number of instructions in code path in BB.
39225 Return 4 if the number of instructions >= 4. */
39227 static int
39228 ix86_count_insn (basic_block bb)
39230 edge e;
39231 edge_iterator ei;
39232 int min_prev_count;
39234 /* Only bother counting instructions along paths with no
39235 more than 2 basic blocks between entry and exit. Given
39236 that BB has an edge to exit, determine if a predecessor
39237 of BB has an edge from entry. If so, compute the number
39238 of instructions in the predecessor block. If there
39239 happen to be multiple such blocks, compute the minimum. */
39240 min_prev_count = 4;
39241 FOR_EACH_EDGE (e, ei, bb->preds)
39243 edge prev_e;
39244 edge_iterator prev_ei;
39246 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39248 min_prev_count = 0;
39249 break;
39251 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39253 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39255 int count = ix86_count_insn_bb (e->src);
39256 if (count < min_prev_count)
39257 min_prev_count = count;
39258 break;
39263 if (min_prev_count < 4)
39264 min_prev_count += ix86_count_insn_bb (bb);
39266 return min_prev_count;
39269 /* Pad short function to 4 instructions. */
39271 static void
39272 ix86_pad_short_function (void)
39274 edge e;
39275 edge_iterator ei;
39277 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39279 rtx ret = BB_END (e->src);
39280 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39282 int insn_count = ix86_count_insn (e->src);
39284 /* Pad short function. */
39285 if (insn_count < 4)
39287 rtx insn = ret;
39289 /* Find epilogue. */
39290 while (insn
39291 && (!NOTE_P (insn)
39292 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39293 insn = PREV_INSN (insn);
39295 if (!insn)
39296 insn = ret;
39298 /* Two NOPs count as one instruction. */
39299 insn_count = 2 * (4 - insn_count);
39300 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39306 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39307 the epilogue, the Windows system unwinder will apply epilogue logic and
39308 produce incorrect offsets. This can be avoided by adding a nop between
39309 the last insn that can throw and the first insn of the epilogue. */
39311 static void
39312 ix86_seh_fixup_eh_fallthru (void)
39314 edge e;
39315 edge_iterator ei;
39317 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39319 rtx insn, next;
39321 /* Find the beginning of the epilogue. */
39322 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39323 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39324 break;
39325 if (insn == NULL)
39326 continue;
39328 /* We only care about preceding insns that can throw. */
39329 insn = prev_active_insn (insn);
39330 if (insn == NULL || !can_throw_internal (insn))
39331 continue;
39333 /* Do not separate calls from their debug information. */
39334 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39335 if (NOTE_P (next)
39336 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39337 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39338 insn = next;
39339 else
39340 break;
39342 emit_insn_after (gen_nops (const1_rtx), insn);
39346 /* Implement machine specific optimizations. We implement padding of returns
39347 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39348 static void
39349 ix86_reorg (void)
39351 /* We are freeing block_for_insn in the toplev to keep compatibility
39352 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39353 compute_bb_for_insn ();
39355 if (TARGET_SEH && current_function_has_exception_handlers ())
39356 ix86_seh_fixup_eh_fallthru ();
39358 if (optimize && optimize_function_for_speed_p (cfun))
39360 if (TARGET_PAD_SHORT_FUNCTION)
39361 ix86_pad_short_function ();
39362 else if (TARGET_PAD_RETURNS)
39363 ix86_pad_returns ();
39364 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39365 if (TARGET_FOUR_JUMP_LIMIT)
39366 ix86_avoid_jump_mispredicts ();
39367 #endif
39371 /* Return nonzero when QImode register that must be represented via REX prefix
39372 is used. */
39373 bool
39374 x86_extended_QIreg_mentioned_p (rtx insn)
39376 int i;
39377 extract_insn_cached (insn);
39378 for (i = 0; i < recog_data.n_operands; i++)
39379 if (GENERAL_REG_P (recog_data.operand[i])
39380 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39381 return true;
39382 return false;
39385 /* Return nonzero when P points to register encoded via REX prefix.
39386 Called via for_each_rtx. */
39387 static int
39388 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39390 unsigned int regno;
39391 if (!REG_P (*p))
39392 return 0;
39393 regno = REGNO (*p);
39394 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39397 /* Return true when INSN mentions register that must be encoded using REX
39398 prefix. */
39399 bool
39400 x86_extended_reg_mentioned_p (rtx insn)
39402 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39403 extended_reg_mentioned_1, NULL);
39406 /* If profitable, negate (without causing overflow) integer constant
39407 of mode MODE at location LOC. Return true in this case. */
39408 bool
39409 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39411 HOST_WIDE_INT val;
39413 if (!CONST_INT_P (*loc))
39414 return false;
39416 switch (mode)
39418 case DImode:
39419 /* DImode x86_64 constants must fit in 32 bits. */
39420 gcc_assert (x86_64_immediate_operand (*loc, mode));
39422 mode = SImode;
39423 break;
39425 case SImode:
39426 case HImode:
39427 case QImode:
39428 break;
39430 default:
39431 gcc_unreachable ();
39434 /* Avoid overflows. */
39435 if (mode_signbit_p (mode, *loc))
39436 return false;
39438 val = INTVAL (*loc);
39440 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39441 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39442 if ((val < 0 && val != -128)
39443 || val == 128)
39445 *loc = GEN_INT (-val);
39446 return true;
39449 return false;
39452 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39453 optabs would emit if we didn't have TFmode patterns. */
39455 void
39456 x86_emit_floatuns (rtx operands[2])
39458 rtx neglab, donelab, i0, i1, f0, in, out;
39459 enum machine_mode mode, inmode;
39461 inmode = GET_MODE (operands[1]);
39462 gcc_assert (inmode == SImode || inmode == DImode);
39464 out = operands[0];
39465 in = force_reg (inmode, operands[1]);
39466 mode = GET_MODE (out);
39467 neglab = gen_label_rtx ();
39468 donelab = gen_label_rtx ();
39469 f0 = gen_reg_rtx (mode);
39471 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39473 expand_float (out, in, 0);
39475 emit_jump_insn (gen_jump (donelab));
39476 emit_barrier ();
39478 emit_label (neglab);
39480 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39481 1, OPTAB_DIRECT);
39482 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39483 1, OPTAB_DIRECT);
39484 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39486 expand_float (f0, i0, 0);
39488 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39490 emit_label (donelab);
39493 /* AVX512F does support 64-byte integer vector operations,
39494 thus the longest vector we are faced with is V64QImode. */
39495 #define MAX_VECT_LEN 64
39497 struct expand_vec_perm_d
39499 rtx target, op0, op1;
39500 unsigned char perm[MAX_VECT_LEN];
39501 enum machine_mode vmode;
39502 unsigned char nelt;
39503 bool one_operand_p;
39504 bool testing_p;
39507 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39508 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39509 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39511 /* Get a vector mode of the same size as the original but with elements
39512 twice as wide. This is only guaranteed to apply to integral vectors. */
39514 static inline enum machine_mode
39515 get_mode_wider_vector (enum machine_mode o)
39517 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39518 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39519 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39520 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39521 return n;
39524 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39525 fill target with val via vec_duplicate. */
39527 static bool
39528 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39530 bool ok;
39531 rtx insn, dup;
39533 /* First attempt to recognize VAL as-is. */
39534 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39535 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39536 if (recog_memoized (insn) < 0)
39538 rtx seq;
39539 /* If that fails, force VAL into a register. */
39541 start_sequence ();
39542 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39543 seq = get_insns ();
39544 end_sequence ();
39545 if (seq)
39546 emit_insn_before (seq, insn);
39548 ok = recog_memoized (insn) >= 0;
39549 gcc_assert (ok);
39551 return true;
39554 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39555 with all elements equal to VAR. Return true if successful. */
39557 static bool
39558 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39559 rtx target, rtx val)
39561 bool ok;
39563 switch (mode)
39565 case V2SImode:
39566 case V2SFmode:
39567 if (!mmx_ok)
39568 return false;
39569 /* FALLTHRU */
39571 case V4DFmode:
39572 case V4DImode:
39573 case V8SFmode:
39574 case V8SImode:
39575 case V2DFmode:
39576 case V2DImode:
39577 case V4SFmode:
39578 case V4SImode:
39579 case V16SImode:
39580 case V8DImode:
39581 case V16SFmode:
39582 case V8DFmode:
39583 return ix86_vector_duplicate_value (mode, target, val);
39585 case V4HImode:
39586 if (!mmx_ok)
39587 return false;
39588 if (TARGET_SSE || TARGET_3DNOW_A)
39590 rtx x;
39592 val = gen_lowpart (SImode, val);
39593 x = gen_rtx_TRUNCATE (HImode, val);
39594 x = gen_rtx_VEC_DUPLICATE (mode, x);
39595 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39596 return true;
39598 goto widen;
39600 case V8QImode:
39601 if (!mmx_ok)
39602 return false;
39603 goto widen;
39605 case V8HImode:
39606 if (TARGET_SSE2)
39608 struct expand_vec_perm_d dperm;
39609 rtx tmp1, tmp2;
39611 permute:
39612 memset (&dperm, 0, sizeof (dperm));
39613 dperm.target = target;
39614 dperm.vmode = mode;
39615 dperm.nelt = GET_MODE_NUNITS (mode);
39616 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39617 dperm.one_operand_p = true;
39619 /* Extend to SImode using a paradoxical SUBREG. */
39620 tmp1 = gen_reg_rtx (SImode);
39621 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39623 /* Insert the SImode value as low element of a V4SImode vector. */
39624 tmp2 = gen_reg_rtx (V4SImode);
39625 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39626 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39628 ok = (expand_vec_perm_1 (&dperm)
39629 || expand_vec_perm_broadcast_1 (&dperm));
39630 gcc_assert (ok);
39631 return ok;
39633 goto widen;
39635 case V16QImode:
39636 if (TARGET_SSE2)
39637 goto permute;
39638 goto widen;
39640 widen:
39641 /* Replicate the value once into the next wider mode and recurse. */
39643 enum machine_mode smode, wsmode, wvmode;
39644 rtx x;
39646 smode = GET_MODE_INNER (mode);
39647 wvmode = get_mode_wider_vector (mode);
39648 wsmode = GET_MODE_INNER (wvmode);
39650 val = convert_modes (wsmode, smode, val, true);
39651 x = expand_simple_binop (wsmode, ASHIFT, val,
39652 GEN_INT (GET_MODE_BITSIZE (smode)),
39653 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39654 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39656 x = gen_reg_rtx (wvmode);
39657 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39658 gcc_assert (ok);
39659 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39660 return ok;
39663 case V16HImode:
39664 case V32QImode:
39666 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39667 rtx x = gen_reg_rtx (hvmode);
39669 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39670 gcc_assert (ok);
39672 x = gen_rtx_VEC_CONCAT (mode, x, x);
39673 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39675 return true;
39677 default:
39678 return false;
39682 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39683 whose ONE_VAR element is VAR, and other elements are zero. Return true
39684 if successful. */
39686 static bool
39687 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39688 rtx target, rtx var, int one_var)
39690 enum machine_mode vsimode;
39691 rtx new_target;
39692 rtx x, tmp;
39693 bool use_vector_set = false;
39695 switch (mode)
39697 case V2DImode:
39698 /* For SSE4.1, we normally use vector set. But if the second
39699 element is zero and inter-unit moves are OK, we use movq
39700 instead. */
39701 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39702 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39703 && one_var == 0));
39704 break;
39705 case V16QImode:
39706 case V4SImode:
39707 case V4SFmode:
39708 use_vector_set = TARGET_SSE4_1;
39709 break;
39710 case V8HImode:
39711 use_vector_set = TARGET_SSE2;
39712 break;
39713 case V4HImode:
39714 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39715 break;
39716 case V32QImode:
39717 case V16HImode:
39718 case V8SImode:
39719 case V8SFmode:
39720 case V4DFmode:
39721 use_vector_set = TARGET_AVX;
39722 break;
39723 case V4DImode:
39724 /* Use ix86_expand_vector_set in 64bit mode only. */
39725 use_vector_set = TARGET_AVX && TARGET_64BIT;
39726 break;
39727 default:
39728 break;
39731 if (use_vector_set)
39733 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39734 var = force_reg (GET_MODE_INNER (mode), var);
39735 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39736 return true;
39739 switch (mode)
39741 case V2SFmode:
39742 case V2SImode:
39743 if (!mmx_ok)
39744 return false;
39745 /* FALLTHRU */
39747 case V2DFmode:
39748 case V2DImode:
39749 if (one_var != 0)
39750 return false;
39751 var = force_reg (GET_MODE_INNER (mode), var);
39752 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39753 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39754 return true;
39756 case V4SFmode:
39757 case V4SImode:
39758 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39759 new_target = gen_reg_rtx (mode);
39760 else
39761 new_target = target;
39762 var = force_reg (GET_MODE_INNER (mode), var);
39763 x = gen_rtx_VEC_DUPLICATE (mode, var);
39764 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39765 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39766 if (one_var != 0)
39768 /* We need to shuffle the value to the correct position, so
39769 create a new pseudo to store the intermediate result. */
39771 /* With SSE2, we can use the integer shuffle insns. */
39772 if (mode != V4SFmode && TARGET_SSE2)
39774 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39775 const1_rtx,
39776 GEN_INT (one_var == 1 ? 0 : 1),
39777 GEN_INT (one_var == 2 ? 0 : 1),
39778 GEN_INT (one_var == 3 ? 0 : 1)));
39779 if (target != new_target)
39780 emit_move_insn (target, new_target);
39781 return true;
39784 /* Otherwise convert the intermediate result to V4SFmode and
39785 use the SSE1 shuffle instructions. */
39786 if (mode != V4SFmode)
39788 tmp = gen_reg_rtx (V4SFmode);
39789 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39791 else
39792 tmp = new_target;
39794 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39795 const1_rtx,
39796 GEN_INT (one_var == 1 ? 0 : 1),
39797 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39798 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39800 if (mode != V4SFmode)
39801 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39802 else if (tmp != target)
39803 emit_move_insn (target, tmp);
39805 else if (target != new_target)
39806 emit_move_insn (target, new_target);
39807 return true;
39809 case V8HImode:
39810 case V16QImode:
39811 vsimode = V4SImode;
39812 goto widen;
39813 case V4HImode:
39814 case V8QImode:
39815 if (!mmx_ok)
39816 return false;
39817 vsimode = V2SImode;
39818 goto widen;
39819 widen:
39820 if (one_var != 0)
39821 return false;
39823 /* Zero extend the variable element to SImode and recurse. */
39824 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39826 x = gen_reg_rtx (vsimode);
39827 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39828 var, one_var))
39829 gcc_unreachable ();
39831 emit_move_insn (target, gen_lowpart (mode, x));
39832 return true;
39834 default:
39835 return false;
39839 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39840 consisting of the values in VALS. It is known that all elements
39841 except ONE_VAR are constants. Return true if successful. */
39843 static bool
39844 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39845 rtx target, rtx vals, int one_var)
39847 rtx var = XVECEXP (vals, 0, one_var);
39848 enum machine_mode wmode;
39849 rtx const_vec, x;
39851 const_vec = copy_rtx (vals);
39852 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39853 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39855 switch (mode)
39857 case V2DFmode:
39858 case V2DImode:
39859 case V2SFmode:
39860 case V2SImode:
39861 /* For the two element vectors, it's just as easy to use
39862 the general case. */
39863 return false;
39865 case V4DImode:
39866 /* Use ix86_expand_vector_set in 64bit mode only. */
39867 if (!TARGET_64BIT)
39868 return false;
39869 case V4DFmode:
39870 case V8SFmode:
39871 case V8SImode:
39872 case V16HImode:
39873 case V32QImode:
39874 case V4SFmode:
39875 case V4SImode:
39876 case V8HImode:
39877 case V4HImode:
39878 break;
39880 case V16QImode:
39881 if (TARGET_SSE4_1)
39882 break;
39883 wmode = V8HImode;
39884 goto widen;
39885 case V8QImode:
39886 wmode = V4HImode;
39887 goto widen;
39888 widen:
39889 /* There's no way to set one QImode entry easily. Combine
39890 the variable value with its adjacent constant value, and
39891 promote to an HImode set. */
39892 x = XVECEXP (vals, 0, one_var ^ 1);
39893 if (one_var & 1)
39895 var = convert_modes (HImode, QImode, var, true);
39896 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39897 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39898 x = GEN_INT (INTVAL (x) & 0xff);
39900 else
39902 var = convert_modes (HImode, QImode, var, true);
39903 x = gen_int_mode (INTVAL (x) << 8, HImode);
39905 if (x != const0_rtx)
39906 var = expand_simple_binop (HImode, IOR, var, x, var,
39907 1, OPTAB_LIB_WIDEN);
39909 x = gen_reg_rtx (wmode);
39910 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39911 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39913 emit_move_insn (target, gen_lowpart (mode, x));
39914 return true;
39916 default:
39917 return false;
39920 emit_move_insn (target, const_vec);
39921 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39922 return true;
39925 /* A subroutine of ix86_expand_vector_init_general. Use vector
39926 concatenate to handle the most general case: all values variable,
39927 and none identical. */
39929 static void
39930 ix86_expand_vector_init_concat (enum machine_mode mode,
39931 rtx target, rtx *ops, int n)
39933 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39934 rtx first[16], second[8], third[4];
39935 rtvec v;
39936 int i, j;
39938 switch (n)
39940 case 2:
39941 switch (mode)
39943 case V16SImode:
39944 cmode = V8SImode;
39945 break;
39946 case V16SFmode:
39947 cmode = V8SFmode;
39948 break;
39949 case V8DImode:
39950 cmode = V4DImode;
39951 break;
39952 case V8DFmode:
39953 cmode = V4DFmode;
39954 break;
39955 case V8SImode:
39956 cmode = V4SImode;
39957 break;
39958 case V8SFmode:
39959 cmode = V4SFmode;
39960 break;
39961 case V4DImode:
39962 cmode = V2DImode;
39963 break;
39964 case V4DFmode:
39965 cmode = V2DFmode;
39966 break;
39967 case V4SImode:
39968 cmode = V2SImode;
39969 break;
39970 case V4SFmode:
39971 cmode = V2SFmode;
39972 break;
39973 case V2DImode:
39974 cmode = DImode;
39975 break;
39976 case V2SImode:
39977 cmode = SImode;
39978 break;
39979 case V2DFmode:
39980 cmode = DFmode;
39981 break;
39982 case V2SFmode:
39983 cmode = SFmode;
39984 break;
39985 default:
39986 gcc_unreachable ();
39989 if (!register_operand (ops[1], cmode))
39990 ops[1] = force_reg (cmode, ops[1]);
39991 if (!register_operand (ops[0], cmode))
39992 ops[0] = force_reg (cmode, ops[0]);
39993 emit_insn (gen_rtx_SET (VOIDmode, target,
39994 gen_rtx_VEC_CONCAT (mode, ops[0],
39995 ops[1])));
39996 break;
39998 case 4:
39999 switch (mode)
40001 case V4DImode:
40002 cmode = V2DImode;
40003 break;
40004 case V4DFmode:
40005 cmode = V2DFmode;
40006 break;
40007 case V4SImode:
40008 cmode = V2SImode;
40009 break;
40010 case V4SFmode:
40011 cmode = V2SFmode;
40012 break;
40013 default:
40014 gcc_unreachable ();
40016 goto half;
40018 case 8:
40019 switch (mode)
40021 case V8DImode:
40022 cmode = V2DImode;
40023 hmode = V4DImode;
40024 break;
40025 case V8DFmode:
40026 cmode = V2DFmode;
40027 hmode = V4DFmode;
40028 break;
40029 case V8SImode:
40030 cmode = V2SImode;
40031 hmode = V4SImode;
40032 break;
40033 case V8SFmode:
40034 cmode = V2SFmode;
40035 hmode = V4SFmode;
40036 break;
40037 default:
40038 gcc_unreachable ();
40040 goto half;
40042 case 16:
40043 switch (mode)
40045 case V16SImode:
40046 cmode = V2SImode;
40047 hmode = V4SImode;
40048 gmode = V8SImode;
40049 break;
40050 case V16SFmode:
40051 cmode = V2SFmode;
40052 hmode = V4SFmode;
40053 gmode = V8SFmode;
40054 break;
40055 default:
40056 gcc_unreachable ();
40058 goto half;
40060 half:
40061 /* FIXME: We process inputs backward to help RA. PR 36222. */
40062 i = n - 1;
40063 j = (n >> 1) - 1;
40064 for (; i > 0; i -= 2, j--)
40066 first[j] = gen_reg_rtx (cmode);
40067 v = gen_rtvec (2, ops[i - 1], ops[i]);
40068 ix86_expand_vector_init (false, first[j],
40069 gen_rtx_PARALLEL (cmode, v));
40072 n >>= 1;
40073 if (n > 4)
40075 gcc_assert (hmode != VOIDmode);
40076 gcc_assert (gmode != VOIDmode);
40077 for (i = j = 0; i < n; i += 2, j++)
40079 second[j] = gen_reg_rtx (hmode);
40080 ix86_expand_vector_init_concat (hmode, second [j],
40081 &first [i], 2);
40083 n >>= 1;
40084 for (i = j = 0; i < n; i += 2, j++)
40086 third[j] = gen_reg_rtx (gmode);
40087 ix86_expand_vector_init_concat (gmode, third[j],
40088 &second[i], 2);
40090 n >>= 1;
40091 ix86_expand_vector_init_concat (mode, target, third, n);
40093 else if (n > 2)
40095 gcc_assert (hmode != VOIDmode);
40096 for (i = j = 0; i < n; i += 2, j++)
40098 second[j] = gen_reg_rtx (hmode);
40099 ix86_expand_vector_init_concat (hmode, second [j],
40100 &first [i], 2);
40102 n >>= 1;
40103 ix86_expand_vector_init_concat (mode, target, second, n);
40105 else
40106 ix86_expand_vector_init_concat (mode, target, first, n);
40107 break;
40109 default:
40110 gcc_unreachable ();
40114 /* A subroutine of ix86_expand_vector_init_general. Use vector
40115 interleave to handle the most general case: all values variable,
40116 and none identical. */
40118 static void
40119 ix86_expand_vector_init_interleave (enum machine_mode mode,
40120 rtx target, rtx *ops, int n)
40122 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40123 int i, j;
40124 rtx op0, op1;
40125 rtx (*gen_load_even) (rtx, rtx, rtx);
40126 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40127 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40129 switch (mode)
40131 case V8HImode:
40132 gen_load_even = gen_vec_setv8hi;
40133 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40134 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40135 inner_mode = HImode;
40136 first_imode = V4SImode;
40137 second_imode = V2DImode;
40138 third_imode = VOIDmode;
40139 break;
40140 case V16QImode:
40141 gen_load_even = gen_vec_setv16qi;
40142 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40143 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40144 inner_mode = QImode;
40145 first_imode = V8HImode;
40146 second_imode = V4SImode;
40147 third_imode = V2DImode;
40148 break;
40149 default:
40150 gcc_unreachable ();
40153 for (i = 0; i < n; i++)
40155 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40156 op0 = gen_reg_rtx (SImode);
40157 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40159 /* Insert the SImode value as low element of V4SImode vector. */
40160 op1 = gen_reg_rtx (V4SImode);
40161 op0 = gen_rtx_VEC_MERGE (V4SImode,
40162 gen_rtx_VEC_DUPLICATE (V4SImode,
40163 op0),
40164 CONST0_RTX (V4SImode),
40165 const1_rtx);
40166 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40168 /* Cast the V4SImode vector back to a vector in orignal mode. */
40169 op0 = gen_reg_rtx (mode);
40170 emit_move_insn (op0, gen_lowpart (mode, op1));
40172 /* Load even elements into the second position. */
40173 emit_insn (gen_load_even (op0,
40174 force_reg (inner_mode,
40175 ops [i + i + 1]),
40176 const1_rtx));
40178 /* Cast vector to FIRST_IMODE vector. */
40179 ops[i] = gen_reg_rtx (first_imode);
40180 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40183 /* Interleave low FIRST_IMODE vectors. */
40184 for (i = j = 0; i < n; i += 2, j++)
40186 op0 = gen_reg_rtx (first_imode);
40187 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40189 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40190 ops[j] = gen_reg_rtx (second_imode);
40191 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40194 /* Interleave low SECOND_IMODE vectors. */
40195 switch (second_imode)
40197 case V4SImode:
40198 for (i = j = 0; i < n / 2; i += 2, j++)
40200 op0 = gen_reg_rtx (second_imode);
40201 emit_insn (gen_interleave_second_low (op0, ops[i],
40202 ops[i + 1]));
40204 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40205 vector. */
40206 ops[j] = gen_reg_rtx (third_imode);
40207 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40209 second_imode = V2DImode;
40210 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40211 /* FALLTHRU */
40213 case V2DImode:
40214 op0 = gen_reg_rtx (second_imode);
40215 emit_insn (gen_interleave_second_low (op0, ops[0],
40216 ops[1]));
40218 /* Cast the SECOND_IMODE vector back to a vector on original
40219 mode. */
40220 emit_insn (gen_rtx_SET (VOIDmode, target,
40221 gen_lowpart (mode, op0)));
40222 break;
40224 default:
40225 gcc_unreachable ();
40229 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40230 all values variable, and none identical. */
40232 static void
40233 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40234 rtx target, rtx vals)
40236 rtx ops[64], op0, op1;
40237 enum machine_mode half_mode = VOIDmode;
40238 int n, i;
40240 switch (mode)
40242 case V2SFmode:
40243 case V2SImode:
40244 if (!mmx_ok && !TARGET_SSE)
40245 break;
40246 /* FALLTHRU */
40248 case V16SImode:
40249 case V16SFmode:
40250 case V8DFmode:
40251 case V8DImode:
40252 case V8SFmode:
40253 case V8SImode:
40254 case V4DFmode:
40255 case V4DImode:
40256 case V4SFmode:
40257 case V4SImode:
40258 case V2DFmode:
40259 case V2DImode:
40260 n = GET_MODE_NUNITS (mode);
40261 for (i = 0; i < n; i++)
40262 ops[i] = XVECEXP (vals, 0, i);
40263 ix86_expand_vector_init_concat (mode, target, ops, n);
40264 return;
40266 case V32QImode:
40267 half_mode = V16QImode;
40268 goto half;
40270 case V16HImode:
40271 half_mode = V8HImode;
40272 goto half;
40274 half:
40275 n = GET_MODE_NUNITS (mode);
40276 for (i = 0; i < n; i++)
40277 ops[i] = XVECEXP (vals, 0, i);
40278 op0 = gen_reg_rtx (half_mode);
40279 op1 = gen_reg_rtx (half_mode);
40280 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40281 n >> 2);
40282 ix86_expand_vector_init_interleave (half_mode, op1,
40283 &ops [n >> 1], n >> 2);
40284 emit_insn (gen_rtx_SET (VOIDmode, target,
40285 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40286 return;
40288 case V16QImode:
40289 if (!TARGET_SSE4_1)
40290 break;
40291 /* FALLTHRU */
40293 case V8HImode:
40294 if (!TARGET_SSE2)
40295 break;
40297 /* Don't use ix86_expand_vector_init_interleave if we can't
40298 move from GPR to SSE register directly. */
40299 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40300 break;
40302 n = GET_MODE_NUNITS (mode);
40303 for (i = 0; i < n; i++)
40304 ops[i] = XVECEXP (vals, 0, i);
40305 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40306 return;
40308 case V4HImode:
40309 case V8QImode:
40310 break;
40312 default:
40313 gcc_unreachable ();
40317 int i, j, n_elts, n_words, n_elt_per_word;
40318 enum machine_mode inner_mode;
40319 rtx words[4], shift;
40321 inner_mode = GET_MODE_INNER (mode);
40322 n_elts = GET_MODE_NUNITS (mode);
40323 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40324 n_elt_per_word = n_elts / n_words;
40325 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40327 for (i = 0; i < n_words; ++i)
40329 rtx word = NULL_RTX;
40331 for (j = 0; j < n_elt_per_word; ++j)
40333 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40334 elt = convert_modes (word_mode, inner_mode, elt, true);
40336 if (j == 0)
40337 word = elt;
40338 else
40340 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40341 word, 1, OPTAB_LIB_WIDEN);
40342 word = expand_simple_binop (word_mode, IOR, word, elt,
40343 word, 1, OPTAB_LIB_WIDEN);
40347 words[i] = word;
40350 if (n_words == 1)
40351 emit_move_insn (target, gen_lowpart (mode, words[0]));
40352 else if (n_words == 2)
40354 rtx tmp = gen_reg_rtx (mode);
40355 emit_clobber (tmp);
40356 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40357 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40358 emit_move_insn (target, tmp);
40360 else if (n_words == 4)
40362 rtx tmp = gen_reg_rtx (V4SImode);
40363 gcc_assert (word_mode == SImode);
40364 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40365 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40366 emit_move_insn (target, gen_lowpart (mode, tmp));
40368 else
40369 gcc_unreachable ();
40373 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40374 instructions unless MMX_OK is true. */
40376 void
40377 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40379 enum machine_mode mode = GET_MODE (target);
40380 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40381 int n_elts = GET_MODE_NUNITS (mode);
40382 int n_var = 0, one_var = -1;
40383 bool all_same = true, all_const_zero = true;
40384 int i;
40385 rtx x;
40387 for (i = 0; i < n_elts; ++i)
40389 x = XVECEXP (vals, 0, i);
40390 if (!(CONST_INT_P (x)
40391 || GET_CODE (x) == CONST_DOUBLE
40392 || GET_CODE (x) == CONST_FIXED))
40393 n_var++, one_var = i;
40394 else if (x != CONST0_RTX (inner_mode))
40395 all_const_zero = false;
40396 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40397 all_same = false;
40400 /* Constants are best loaded from the constant pool. */
40401 if (n_var == 0)
40403 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40404 return;
40407 /* If all values are identical, broadcast the value. */
40408 if (all_same
40409 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40410 XVECEXP (vals, 0, 0)))
40411 return;
40413 /* Values where only one field is non-constant are best loaded from
40414 the pool and overwritten via move later. */
40415 if (n_var == 1)
40417 if (all_const_zero
40418 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40419 XVECEXP (vals, 0, one_var),
40420 one_var))
40421 return;
40423 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40424 return;
40427 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40430 void
40431 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40433 enum machine_mode mode = GET_MODE (target);
40434 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40435 enum machine_mode half_mode;
40436 bool use_vec_merge = false;
40437 rtx tmp;
40438 static rtx (*gen_extract[6][2]) (rtx, rtx)
40440 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40441 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40442 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40443 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40444 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40445 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40447 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40449 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40450 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40451 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40452 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40453 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40454 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40456 int i, j, n;
40458 switch (mode)
40460 case V2SFmode:
40461 case V2SImode:
40462 if (mmx_ok)
40464 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40465 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40466 if (elt == 0)
40467 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40468 else
40469 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40470 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40471 return;
40473 break;
40475 case V2DImode:
40476 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40477 if (use_vec_merge)
40478 break;
40480 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40481 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40482 if (elt == 0)
40483 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40484 else
40485 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40486 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40487 return;
40489 case V2DFmode:
40491 rtx op0, op1;
40493 /* For the two element vectors, we implement a VEC_CONCAT with
40494 the extraction of the other element. */
40496 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40497 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40499 if (elt == 0)
40500 op0 = val, op1 = tmp;
40501 else
40502 op0 = tmp, op1 = val;
40504 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40505 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40507 return;
40509 case V4SFmode:
40510 use_vec_merge = TARGET_SSE4_1;
40511 if (use_vec_merge)
40512 break;
40514 switch (elt)
40516 case 0:
40517 use_vec_merge = true;
40518 break;
40520 case 1:
40521 /* tmp = target = A B C D */
40522 tmp = copy_to_reg (target);
40523 /* target = A A B B */
40524 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40525 /* target = X A B B */
40526 ix86_expand_vector_set (false, target, val, 0);
40527 /* target = A X C D */
40528 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40529 const1_rtx, const0_rtx,
40530 GEN_INT (2+4), GEN_INT (3+4)));
40531 return;
40533 case 2:
40534 /* tmp = target = A B C D */
40535 tmp = copy_to_reg (target);
40536 /* tmp = X B C D */
40537 ix86_expand_vector_set (false, tmp, val, 0);
40538 /* target = A B X D */
40539 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40540 const0_rtx, const1_rtx,
40541 GEN_INT (0+4), GEN_INT (3+4)));
40542 return;
40544 case 3:
40545 /* tmp = target = A B C D */
40546 tmp = copy_to_reg (target);
40547 /* tmp = X B C D */
40548 ix86_expand_vector_set (false, tmp, val, 0);
40549 /* target = A B X D */
40550 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40551 const0_rtx, const1_rtx,
40552 GEN_INT (2+4), GEN_INT (0+4)));
40553 return;
40555 default:
40556 gcc_unreachable ();
40558 break;
40560 case V4SImode:
40561 use_vec_merge = TARGET_SSE4_1;
40562 if (use_vec_merge)
40563 break;
40565 /* Element 0 handled by vec_merge below. */
40566 if (elt == 0)
40568 use_vec_merge = true;
40569 break;
40572 if (TARGET_SSE2)
40574 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40575 store into element 0, then shuffle them back. */
40577 rtx order[4];
40579 order[0] = GEN_INT (elt);
40580 order[1] = const1_rtx;
40581 order[2] = const2_rtx;
40582 order[3] = GEN_INT (3);
40583 order[elt] = const0_rtx;
40585 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40586 order[1], order[2], order[3]));
40588 ix86_expand_vector_set (false, target, val, 0);
40590 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40591 order[1], order[2], order[3]));
40593 else
40595 /* For SSE1, we have to reuse the V4SF code. */
40596 rtx t = gen_reg_rtx (V4SFmode);
40597 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40598 emit_move_insn (target, gen_lowpart (mode, t));
40600 return;
40602 case V8HImode:
40603 use_vec_merge = TARGET_SSE2;
40604 break;
40605 case V4HImode:
40606 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40607 break;
40609 case V16QImode:
40610 use_vec_merge = TARGET_SSE4_1;
40611 break;
40613 case V8QImode:
40614 break;
40616 case V32QImode:
40617 half_mode = V16QImode;
40618 j = 0;
40619 n = 16;
40620 goto half;
40622 case V16HImode:
40623 half_mode = V8HImode;
40624 j = 1;
40625 n = 8;
40626 goto half;
40628 case V8SImode:
40629 half_mode = V4SImode;
40630 j = 2;
40631 n = 4;
40632 goto half;
40634 case V4DImode:
40635 half_mode = V2DImode;
40636 j = 3;
40637 n = 2;
40638 goto half;
40640 case V8SFmode:
40641 half_mode = V4SFmode;
40642 j = 4;
40643 n = 4;
40644 goto half;
40646 case V4DFmode:
40647 half_mode = V2DFmode;
40648 j = 5;
40649 n = 2;
40650 goto half;
40652 half:
40653 /* Compute offset. */
40654 i = elt / n;
40655 elt %= n;
40657 gcc_assert (i <= 1);
40659 /* Extract the half. */
40660 tmp = gen_reg_rtx (half_mode);
40661 emit_insn (gen_extract[j][i] (tmp, target));
40663 /* Put val in tmp at elt. */
40664 ix86_expand_vector_set (false, tmp, val, elt);
40666 /* Put it back. */
40667 emit_insn (gen_insert[j][i] (target, target, tmp));
40668 return;
40670 default:
40671 break;
40674 if (use_vec_merge)
40676 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40677 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40678 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40680 else
40682 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40684 emit_move_insn (mem, target);
40686 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40687 emit_move_insn (tmp, val);
40689 emit_move_insn (target, mem);
40693 void
40694 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40696 enum machine_mode mode = GET_MODE (vec);
40697 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40698 bool use_vec_extr = false;
40699 rtx tmp;
40701 switch (mode)
40703 case V2SImode:
40704 case V2SFmode:
40705 if (!mmx_ok)
40706 break;
40707 /* FALLTHRU */
40709 case V2DFmode:
40710 case V2DImode:
40711 use_vec_extr = true;
40712 break;
40714 case V4SFmode:
40715 use_vec_extr = TARGET_SSE4_1;
40716 if (use_vec_extr)
40717 break;
40719 switch (elt)
40721 case 0:
40722 tmp = vec;
40723 break;
40725 case 1:
40726 case 3:
40727 tmp = gen_reg_rtx (mode);
40728 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40729 GEN_INT (elt), GEN_INT (elt),
40730 GEN_INT (elt+4), GEN_INT (elt+4)));
40731 break;
40733 case 2:
40734 tmp = gen_reg_rtx (mode);
40735 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40736 break;
40738 default:
40739 gcc_unreachable ();
40741 vec = tmp;
40742 use_vec_extr = true;
40743 elt = 0;
40744 break;
40746 case V4SImode:
40747 use_vec_extr = TARGET_SSE4_1;
40748 if (use_vec_extr)
40749 break;
40751 if (TARGET_SSE2)
40753 switch (elt)
40755 case 0:
40756 tmp = vec;
40757 break;
40759 case 1:
40760 case 3:
40761 tmp = gen_reg_rtx (mode);
40762 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40763 GEN_INT (elt), GEN_INT (elt),
40764 GEN_INT (elt), GEN_INT (elt)));
40765 break;
40767 case 2:
40768 tmp = gen_reg_rtx (mode);
40769 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40770 break;
40772 default:
40773 gcc_unreachable ();
40775 vec = tmp;
40776 use_vec_extr = true;
40777 elt = 0;
40779 else
40781 /* For SSE1, we have to reuse the V4SF code. */
40782 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40783 gen_lowpart (V4SFmode, vec), elt);
40784 return;
40786 break;
40788 case V8HImode:
40789 use_vec_extr = TARGET_SSE2;
40790 break;
40791 case V4HImode:
40792 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40793 break;
40795 case V16QImode:
40796 use_vec_extr = TARGET_SSE4_1;
40797 break;
40799 case V8SFmode:
40800 if (TARGET_AVX)
40802 tmp = gen_reg_rtx (V4SFmode);
40803 if (elt < 4)
40804 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40805 else
40806 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40807 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40808 return;
40810 break;
40812 case V4DFmode:
40813 if (TARGET_AVX)
40815 tmp = gen_reg_rtx (V2DFmode);
40816 if (elt < 2)
40817 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40818 else
40819 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40820 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40821 return;
40823 break;
40825 case V32QImode:
40826 if (TARGET_AVX)
40828 tmp = gen_reg_rtx (V16QImode);
40829 if (elt < 16)
40830 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40831 else
40832 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40833 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40834 return;
40836 break;
40838 case V16HImode:
40839 if (TARGET_AVX)
40841 tmp = gen_reg_rtx (V8HImode);
40842 if (elt < 8)
40843 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40844 else
40845 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40846 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40847 return;
40849 break;
40851 case V8SImode:
40852 if (TARGET_AVX)
40854 tmp = gen_reg_rtx (V4SImode);
40855 if (elt < 4)
40856 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40857 else
40858 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40859 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40860 return;
40862 break;
40864 case V4DImode:
40865 if (TARGET_AVX)
40867 tmp = gen_reg_rtx (V2DImode);
40868 if (elt < 2)
40869 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40870 else
40871 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40872 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40873 return;
40875 break;
40877 case V16SFmode:
40878 tmp = gen_reg_rtx (V8SFmode);
40879 if (elt < 8)
40880 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40881 else
40882 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40883 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40884 return;
40886 case V8DFmode:
40887 tmp = gen_reg_rtx (V4DFmode);
40888 if (elt < 4)
40889 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40890 else
40891 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40892 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40893 return;
40895 case V16SImode:
40896 tmp = gen_reg_rtx (V8SImode);
40897 if (elt < 8)
40898 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40899 else
40900 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40901 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40902 return;
40904 case V8DImode:
40905 tmp = gen_reg_rtx (V4DImode);
40906 if (elt < 4)
40907 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40908 else
40909 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40910 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40911 return;
40913 case V8QImode:
40914 /* ??? Could extract the appropriate HImode element and shift. */
40915 default:
40916 break;
40919 if (use_vec_extr)
40921 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40922 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40924 /* Let the rtl optimizers know about the zero extension performed. */
40925 if (inner_mode == QImode || inner_mode == HImode)
40927 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40928 target = gen_lowpart (SImode, target);
40931 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40933 else
40935 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40937 emit_move_insn (mem, vec);
40939 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40940 emit_move_insn (target, tmp);
40944 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40945 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40946 The upper bits of DEST are undefined, though they shouldn't cause
40947 exceptions (some bits from src or all zeros are ok). */
40949 static void
40950 emit_reduc_half (rtx dest, rtx src, int i)
40952 rtx tem, d = dest;
40953 switch (GET_MODE (src))
40955 case V4SFmode:
40956 if (i == 128)
40957 tem = gen_sse_movhlps (dest, src, src);
40958 else
40959 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40960 GEN_INT (1 + 4), GEN_INT (1 + 4));
40961 break;
40962 case V2DFmode:
40963 tem = gen_vec_interleave_highv2df (dest, src, src);
40964 break;
40965 case V16QImode:
40966 case V8HImode:
40967 case V4SImode:
40968 case V2DImode:
40969 d = gen_reg_rtx (V1TImode);
40970 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40971 GEN_INT (i / 2));
40972 break;
40973 case V8SFmode:
40974 if (i == 256)
40975 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40976 else
40977 tem = gen_avx_shufps256 (dest, src, src,
40978 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40979 break;
40980 case V4DFmode:
40981 if (i == 256)
40982 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
40983 else
40984 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
40985 break;
40986 case V32QImode:
40987 case V16HImode:
40988 case V8SImode:
40989 case V4DImode:
40990 if (i == 256)
40992 if (GET_MODE (dest) != V4DImode)
40993 d = gen_reg_rtx (V4DImode);
40994 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
40995 gen_lowpart (V4DImode, src),
40996 const1_rtx);
40998 else
41000 d = gen_reg_rtx (V2TImode);
41001 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41002 GEN_INT (i / 2));
41004 break;
41005 case V16SImode:
41006 case V16SFmode:
41007 case V8DImode:
41008 case V8DFmode:
41009 if (i > 128)
41010 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41011 gen_lowpart (V16SImode, src),
41012 gen_lowpart (V16SImode, src),
41013 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41014 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41015 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41016 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41017 GEN_INT (0xC), GEN_INT (0xD),
41018 GEN_INT (0xE), GEN_INT (0xF),
41019 GEN_INT (0x10), GEN_INT (0x11),
41020 GEN_INT (0x12), GEN_INT (0x13),
41021 GEN_INT (0x14), GEN_INT (0x15),
41022 GEN_INT (0x16), GEN_INT (0x17));
41023 else
41024 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41025 gen_lowpart (V16SImode, src),
41026 GEN_INT (i == 128 ? 0x2 : 0x1),
41027 GEN_INT (0x3),
41028 GEN_INT (0x3),
41029 GEN_INT (0x3),
41030 GEN_INT (i == 128 ? 0x6 : 0x5),
41031 GEN_INT (0x7),
41032 GEN_INT (0x7),
41033 GEN_INT (0x7),
41034 GEN_INT (i == 128 ? 0xA : 0x9),
41035 GEN_INT (0xB),
41036 GEN_INT (0xB),
41037 GEN_INT (0xB),
41038 GEN_INT (i == 128 ? 0xE : 0xD),
41039 GEN_INT (0xF),
41040 GEN_INT (0xF),
41041 GEN_INT (0xF));
41042 break;
41043 default:
41044 gcc_unreachable ();
41046 emit_insn (tem);
41047 if (d != dest)
41048 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41051 /* Expand a vector reduction. FN is the binary pattern to reduce;
41052 DEST is the destination; IN is the input vector. */
41054 void
41055 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41057 rtx half, dst, vec = in;
41058 enum machine_mode mode = GET_MODE (in);
41059 int i;
41061 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41062 if (TARGET_SSE4_1
41063 && mode == V8HImode
41064 && fn == gen_uminv8hi3)
41066 emit_insn (gen_sse4_1_phminposuw (dest, in));
41067 return;
41070 for (i = GET_MODE_BITSIZE (mode);
41071 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41072 i >>= 1)
41074 half = gen_reg_rtx (mode);
41075 emit_reduc_half (half, vec, i);
41076 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41077 dst = dest;
41078 else
41079 dst = gen_reg_rtx (mode);
41080 emit_insn (fn (dst, half, vec));
41081 vec = dst;
41085 /* Target hook for scalar_mode_supported_p. */
41086 static bool
41087 ix86_scalar_mode_supported_p (enum machine_mode mode)
41089 if (DECIMAL_FLOAT_MODE_P (mode))
41090 return default_decimal_float_supported_p ();
41091 else if (mode == TFmode)
41092 return true;
41093 else
41094 return default_scalar_mode_supported_p (mode);
41097 /* Implements target hook vector_mode_supported_p. */
41098 static bool
41099 ix86_vector_mode_supported_p (enum machine_mode mode)
41101 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41102 return true;
41103 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41104 return true;
41105 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41106 return true;
41107 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41108 return true;
41109 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41110 return true;
41111 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41112 return true;
41113 return false;
41116 /* Target hook for c_mode_for_suffix. */
41117 static enum machine_mode
41118 ix86_c_mode_for_suffix (char suffix)
41120 if (suffix == 'q')
41121 return TFmode;
41122 if (suffix == 'w')
41123 return XFmode;
41125 return VOIDmode;
41128 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41130 We do this in the new i386 backend to maintain source compatibility
41131 with the old cc0-based compiler. */
41133 static tree
41134 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41135 tree inputs ATTRIBUTE_UNUSED,
41136 tree clobbers)
41138 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41139 clobbers);
41140 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41141 clobbers);
41142 return clobbers;
41145 /* Implements target vector targetm.asm.encode_section_info. */
41147 static void ATTRIBUTE_UNUSED
41148 ix86_encode_section_info (tree decl, rtx rtl, int first)
41150 default_encode_section_info (decl, rtl, first);
41152 if (TREE_CODE (decl) == VAR_DECL
41153 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41154 && ix86_in_large_data_p (decl))
41155 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41158 /* Worker function for REVERSE_CONDITION. */
41160 enum rtx_code
41161 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41163 return (mode != CCFPmode && mode != CCFPUmode
41164 ? reverse_condition (code)
41165 : reverse_condition_maybe_unordered (code));
41168 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41169 to OPERANDS[0]. */
41171 const char *
41172 output_387_reg_move (rtx insn, rtx *operands)
41174 if (REG_P (operands[0]))
41176 if (REG_P (operands[1])
41177 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41179 if (REGNO (operands[0]) == FIRST_STACK_REG)
41180 return output_387_ffreep (operands, 0);
41181 return "fstp\t%y0";
41183 if (STACK_TOP_P (operands[0]))
41184 return "fld%Z1\t%y1";
41185 return "fst\t%y0";
41187 else if (MEM_P (operands[0]))
41189 gcc_assert (REG_P (operands[1]));
41190 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41191 return "fstp%Z0\t%y0";
41192 else
41194 /* There is no non-popping store to memory for XFmode.
41195 So if we need one, follow the store with a load. */
41196 if (GET_MODE (operands[0]) == XFmode)
41197 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41198 else
41199 return "fst%Z0\t%y0";
41202 else
41203 gcc_unreachable();
41206 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41207 FP status register is set. */
41209 void
41210 ix86_emit_fp_unordered_jump (rtx label)
41212 rtx reg = gen_reg_rtx (HImode);
41213 rtx temp;
41215 emit_insn (gen_x86_fnstsw_1 (reg));
41217 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41219 emit_insn (gen_x86_sahf_1 (reg));
41221 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41222 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41224 else
41226 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41228 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41229 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41232 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41233 gen_rtx_LABEL_REF (VOIDmode, label),
41234 pc_rtx);
41235 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41237 emit_jump_insn (temp);
41238 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41241 /* Output code to perform a log1p XFmode calculation. */
41243 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41245 rtx label1 = gen_label_rtx ();
41246 rtx label2 = gen_label_rtx ();
41248 rtx tmp = gen_reg_rtx (XFmode);
41249 rtx tmp2 = gen_reg_rtx (XFmode);
41250 rtx test;
41252 emit_insn (gen_absxf2 (tmp, op1));
41253 test = gen_rtx_GE (VOIDmode, tmp,
41254 CONST_DOUBLE_FROM_REAL_VALUE (
41255 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41256 XFmode));
41257 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41259 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41260 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41261 emit_jump (label2);
41263 emit_label (label1);
41264 emit_move_insn (tmp, CONST1_RTX (XFmode));
41265 emit_insn (gen_addxf3 (tmp, op1, tmp));
41266 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41267 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41269 emit_label (label2);
41272 /* Emit code for round calculation. */
41273 void ix86_emit_i387_round (rtx op0, rtx op1)
41275 enum machine_mode inmode = GET_MODE (op1);
41276 enum machine_mode outmode = GET_MODE (op0);
41277 rtx e1, e2, res, tmp, tmp1, half;
41278 rtx scratch = gen_reg_rtx (HImode);
41279 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41280 rtx jump_label = gen_label_rtx ();
41281 rtx insn;
41282 rtx (*gen_abs) (rtx, rtx);
41283 rtx (*gen_neg) (rtx, rtx);
41285 switch (inmode)
41287 case SFmode:
41288 gen_abs = gen_abssf2;
41289 break;
41290 case DFmode:
41291 gen_abs = gen_absdf2;
41292 break;
41293 case XFmode:
41294 gen_abs = gen_absxf2;
41295 break;
41296 default:
41297 gcc_unreachable ();
41300 switch (outmode)
41302 case SFmode:
41303 gen_neg = gen_negsf2;
41304 break;
41305 case DFmode:
41306 gen_neg = gen_negdf2;
41307 break;
41308 case XFmode:
41309 gen_neg = gen_negxf2;
41310 break;
41311 case HImode:
41312 gen_neg = gen_neghi2;
41313 break;
41314 case SImode:
41315 gen_neg = gen_negsi2;
41316 break;
41317 case DImode:
41318 gen_neg = gen_negdi2;
41319 break;
41320 default:
41321 gcc_unreachable ();
41324 e1 = gen_reg_rtx (inmode);
41325 e2 = gen_reg_rtx (inmode);
41326 res = gen_reg_rtx (outmode);
41328 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41330 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41332 /* scratch = fxam(op1) */
41333 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41334 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41335 UNSPEC_FXAM)));
41336 /* e1 = fabs(op1) */
41337 emit_insn (gen_abs (e1, op1));
41339 /* e2 = e1 + 0.5 */
41340 half = force_reg (inmode, half);
41341 emit_insn (gen_rtx_SET (VOIDmode, e2,
41342 gen_rtx_PLUS (inmode, e1, half)));
41344 /* res = floor(e2) */
41345 if (inmode != XFmode)
41347 tmp1 = gen_reg_rtx (XFmode);
41349 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41350 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41352 else
41353 tmp1 = e2;
41355 switch (outmode)
41357 case SFmode:
41358 case DFmode:
41360 rtx tmp0 = gen_reg_rtx (XFmode);
41362 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41364 emit_insn (gen_rtx_SET (VOIDmode, res,
41365 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41366 UNSPEC_TRUNC_NOOP)));
41368 break;
41369 case XFmode:
41370 emit_insn (gen_frndintxf2_floor (res, tmp1));
41371 break;
41372 case HImode:
41373 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41374 break;
41375 case SImode:
41376 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41377 break;
41378 case DImode:
41379 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41380 break;
41381 default:
41382 gcc_unreachable ();
41385 /* flags = signbit(a) */
41386 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41388 /* if (flags) then res = -res */
41389 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41390 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41391 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41392 pc_rtx);
41393 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41394 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41395 JUMP_LABEL (insn) = jump_label;
41397 emit_insn (gen_neg (res, res));
41399 emit_label (jump_label);
41400 LABEL_NUSES (jump_label) = 1;
41402 emit_move_insn (op0, res);
41405 /* Output code to perform a Newton-Rhapson approximation of a single precision
41406 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41408 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41410 rtx x0, x1, e0, e1;
41412 x0 = gen_reg_rtx (mode);
41413 e0 = gen_reg_rtx (mode);
41414 e1 = gen_reg_rtx (mode);
41415 x1 = gen_reg_rtx (mode);
41417 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41419 b = force_reg (mode, b);
41421 /* x0 = rcp(b) estimate */
41422 if (mode == V16SFmode || mode == V8DFmode)
41423 emit_insn (gen_rtx_SET (VOIDmode, x0,
41424 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41425 UNSPEC_RCP14)));
41426 else
41427 emit_insn (gen_rtx_SET (VOIDmode, x0,
41428 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41429 UNSPEC_RCP)));
41431 /* e0 = x0 * b */
41432 emit_insn (gen_rtx_SET (VOIDmode, e0,
41433 gen_rtx_MULT (mode, x0, b)));
41435 /* e0 = x0 * e0 */
41436 emit_insn (gen_rtx_SET (VOIDmode, e0,
41437 gen_rtx_MULT (mode, x0, e0)));
41439 /* e1 = x0 + x0 */
41440 emit_insn (gen_rtx_SET (VOIDmode, e1,
41441 gen_rtx_PLUS (mode, x0, x0)));
41443 /* x1 = e1 - e0 */
41444 emit_insn (gen_rtx_SET (VOIDmode, x1,
41445 gen_rtx_MINUS (mode, e1, e0)));
41447 /* res = a * x1 */
41448 emit_insn (gen_rtx_SET (VOIDmode, res,
41449 gen_rtx_MULT (mode, a, x1)));
41452 /* Output code to perform a Newton-Rhapson approximation of a
41453 single precision floating point [reciprocal] square root. */
41455 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41456 bool recip)
41458 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41459 REAL_VALUE_TYPE r;
41460 int unspec;
41462 x0 = gen_reg_rtx (mode);
41463 e0 = gen_reg_rtx (mode);
41464 e1 = gen_reg_rtx (mode);
41465 e2 = gen_reg_rtx (mode);
41466 e3 = gen_reg_rtx (mode);
41468 real_from_integer (&r, VOIDmode, -3, -1, 0);
41469 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41471 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41472 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41473 unspec = UNSPEC_RSQRT;
41475 if (VECTOR_MODE_P (mode))
41477 mthree = ix86_build_const_vector (mode, true, mthree);
41478 mhalf = ix86_build_const_vector (mode, true, mhalf);
41479 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41480 if (GET_MODE_SIZE (mode) == 64)
41481 unspec = UNSPEC_RSQRT14;
41484 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41485 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41487 a = force_reg (mode, a);
41489 /* x0 = rsqrt(a) estimate */
41490 emit_insn (gen_rtx_SET (VOIDmode, x0,
41491 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41492 unspec)));
41494 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41495 if (!recip)
41497 rtx zero, mask;
41499 zero = gen_reg_rtx (mode);
41500 mask = gen_reg_rtx (mode);
41502 zero = force_reg (mode, CONST0_RTX(mode));
41504 /* Handle masked compare. */
41505 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41507 mask = gen_reg_rtx (HImode);
41508 /* Imm value 0x4 corresponds to not-equal comparison. */
41509 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41510 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41512 else
41514 emit_insn (gen_rtx_SET (VOIDmode, mask,
41515 gen_rtx_NE (mode, zero, a)));
41517 emit_insn (gen_rtx_SET (VOIDmode, x0,
41518 gen_rtx_AND (mode, x0, mask)));
41522 /* e0 = x0 * a */
41523 emit_insn (gen_rtx_SET (VOIDmode, e0,
41524 gen_rtx_MULT (mode, x0, a)));
41525 /* e1 = e0 * x0 */
41526 emit_insn (gen_rtx_SET (VOIDmode, e1,
41527 gen_rtx_MULT (mode, e0, x0)));
41529 /* e2 = e1 - 3. */
41530 mthree = force_reg (mode, mthree);
41531 emit_insn (gen_rtx_SET (VOIDmode, e2,
41532 gen_rtx_PLUS (mode, e1, mthree)));
41534 mhalf = force_reg (mode, mhalf);
41535 if (recip)
41536 /* e3 = -.5 * x0 */
41537 emit_insn (gen_rtx_SET (VOIDmode, e3,
41538 gen_rtx_MULT (mode, x0, mhalf)));
41539 else
41540 /* e3 = -.5 * e0 */
41541 emit_insn (gen_rtx_SET (VOIDmode, e3,
41542 gen_rtx_MULT (mode, e0, mhalf)));
41543 /* ret = e2 * e3 */
41544 emit_insn (gen_rtx_SET (VOIDmode, res,
41545 gen_rtx_MULT (mode, e2, e3)));
41548 #ifdef TARGET_SOLARIS
41549 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41551 static void
41552 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41553 tree decl)
41555 /* With Binutils 2.15, the "@unwind" marker must be specified on
41556 every occurrence of the ".eh_frame" section, not just the first
41557 one. */
41558 if (TARGET_64BIT
41559 && strcmp (name, ".eh_frame") == 0)
41561 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41562 flags & SECTION_WRITE ? "aw" : "a");
41563 return;
41566 #ifndef USE_GAS
41567 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41569 solaris_elf_asm_comdat_section (name, flags, decl);
41570 return;
41572 #endif
41574 default_elf_asm_named_section (name, flags, decl);
41576 #endif /* TARGET_SOLARIS */
41578 /* Return the mangling of TYPE if it is an extended fundamental type. */
41580 static const char *
41581 ix86_mangle_type (const_tree type)
41583 type = TYPE_MAIN_VARIANT (type);
41585 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41586 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41587 return NULL;
41589 switch (TYPE_MODE (type))
41591 case TFmode:
41592 /* __float128 is "g". */
41593 return "g";
41594 case XFmode:
41595 /* "long double" or __float80 is "e". */
41596 return "e";
41597 default:
41598 return NULL;
41602 /* For 32-bit code we can save PIC register setup by using
41603 __stack_chk_fail_local hidden function instead of calling
41604 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41605 register, so it is better to call __stack_chk_fail directly. */
41607 static tree ATTRIBUTE_UNUSED
41608 ix86_stack_protect_fail (void)
41610 return TARGET_64BIT
41611 ? default_external_stack_protect_fail ()
41612 : default_hidden_stack_protect_fail ();
41615 /* Select a format to encode pointers in exception handling data. CODE
41616 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41617 true if the symbol may be affected by dynamic relocations.
41619 ??? All x86 object file formats are capable of representing this.
41620 After all, the relocation needed is the same as for the call insn.
41621 Whether or not a particular assembler allows us to enter such, I
41622 guess we'll have to see. */
41624 asm_preferred_eh_data_format (int code, int global)
41626 if (flag_pic)
41628 int type = DW_EH_PE_sdata8;
41629 if (!TARGET_64BIT
41630 || ix86_cmodel == CM_SMALL_PIC
41631 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41632 type = DW_EH_PE_sdata4;
41633 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41635 if (ix86_cmodel == CM_SMALL
41636 || (ix86_cmodel == CM_MEDIUM && code))
41637 return DW_EH_PE_udata4;
41638 return DW_EH_PE_absptr;
41641 /* Expand copysign from SIGN to the positive value ABS_VALUE
41642 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41643 the sign-bit. */
41644 static void
41645 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41647 enum machine_mode mode = GET_MODE (sign);
41648 rtx sgn = gen_reg_rtx (mode);
41649 if (mask == NULL_RTX)
41651 enum machine_mode vmode;
41653 if (mode == SFmode)
41654 vmode = V4SFmode;
41655 else if (mode == DFmode)
41656 vmode = V2DFmode;
41657 else
41658 vmode = mode;
41660 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41661 if (!VECTOR_MODE_P (mode))
41663 /* We need to generate a scalar mode mask in this case. */
41664 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41665 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41666 mask = gen_reg_rtx (mode);
41667 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41670 else
41671 mask = gen_rtx_NOT (mode, mask);
41672 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41673 gen_rtx_AND (mode, mask, sign)));
41674 emit_insn (gen_rtx_SET (VOIDmode, result,
41675 gen_rtx_IOR (mode, abs_value, sgn)));
41678 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41679 mask for masking out the sign-bit is stored in *SMASK, if that is
41680 non-null. */
41681 static rtx
41682 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41684 enum machine_mode vmode, mode = GET_MODE (op0);
41685 rtx xa, mask;
41687 xa = gen_reg_rtx (mode);
41688 if (mode == SFmode)
41689 vmode = V4SFmode;
41690 else if (mode == DFmode)
41691 vmode = V2DFmode;
41692 else
41693 vmode = mode;
41694 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41695 if (!VECTOR_MODE_P (mode))
41697 /* We need to generate a scalar mode mask in this case. */
41698 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41699 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41700 mask = gen_reg_rtx (mode);
41701 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41703 emit_insn (gen_rtx_SET (VOIDmode, xa,
41704 gen_rtx_AND (mode, op0, mask)));
41706 if (smask)
41707 *smask = mask;
41709 return xa;
41712 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41713 swapping the operands if SWAP_OPERANDS is true. The expanded
41714 code is a forward jump to a newly created label in case the
41715 comparison is true. The generated label rtx is returned. */
41716 static rtx
41717 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41718 bool swap_operands)
41720 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41721 rtx label, tmp;
41723 if (swap_operands)
41725 tmp = op0;
41726 op0 = op1;
41727 op1 = tmp;
41730 label = gen_label_rtx ();
41731 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41732 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41733 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41734 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41735 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41736 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41737 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41738 JUMP_LABEL (tmp) = label;
41740 return label;
41743 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41744 using comparison code CODE. Operands are swapped for the comparison if
41745 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41746 static rtx
41747 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41748 bool swap_operands)
41750 rtx (*insn)(rtx, rtx, rtx, rtx);
41751 enum machine_mode mode = GET_MODE (op0);
41752 rtx mask = gen_reg_rtx (mode);
41754 if (swap_operands)
41756 rtx tmp = op0;
41757 op0 = op1;
41758 op1 = tmp;
41761 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41763 emit_insn (insn (mask, op0, op1,
41764 gen_rtx_fmt_ee (code, mode, op0, op1)));
41765 return mask;
41768 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41769 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41770 static rtx
41771 ix86_gen_TWO52 (enum machine_mode mode)
41773 REAL_VALUE_TYPE TWO52r;
41774 rtx TWO52;
41776 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41777 TWO52 = const_double_from_real_value (TWO52r, mode);
41778 TWO52 = force_reg (mode, TWO52);
41780 return TWO52;
41783 /* Expand SSE sequence for computing lround from OP1 storing
41784 into OP0. */
41785 void
41786 ix86_expand_lround (rtx op0, rtx op1)
41788 /* C code for the stuff we're doing below:
41789 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41790 return (long)tmp;
41792 enum machine_mode mode = GET_MODE (op1);
41793 const struct real_format *fmt;
41794 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41795 rtx adj;
41797 /* load nextafter (0.5, 0.0) */
41798 fmt = REAL_MODE_FORMAT (mode);
41799 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41800 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41802 /* adj = copysign (0.5, op1) */
41803 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41804 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41806 /* adj = op1 + adj */
41807 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41809 /* op0 = (imode)adj */
41810 expand_fix (op0, adj, 0);
41813 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41814 into OPERAND0. */
41815 void
41816 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41818 /* C code for the stuff we're doing below (for do_floor):
41819 xi = (long)op1;
41820 xi -= (double)xi > op1 ? 1 : 0;
41821 return xi;
41823 enum machine_mode fmode = GET_MODE (op1);
41824 enum machine_mode imode = GET_MODE (op0);
41825 rtx ireg, freg, label, tmp;
41827 /* reg = (long)op1 */
41828 ireg = gen_reg_rtx (imode);
41829 expand_fix (ireg, op1, 0);
41831 /* freg = (double)reg */
41832 freg = gen_reg_rtx (fmode);
41833 expand_float (freg, ireg, 0);
41835 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41836 label = ix86_expand_sse_compare_and_jump (UNLE,
41837 freg, op1, !do_floor);
41838 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41839 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41840 emit_move_insn (ireg, tmp);
41842 emit_label (label);
41843 LABEL_NUSES (label) = 1;
41845 emit_move_insn (op0, ireg);
41848 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41849 result in OPERAND0. */
41850 void
41851 ix86_expand_rint (rtx operand0, rtx operand1)
41853 /* C code for the stuff we're doing below:
41854 xa = fabs (operand1);
41855 if (!isless (xa, 2**52))
41856 return operand1;
41857 xa = xa + 2**52 - 2**52;
41858 return copysign (xa, operand1);
41860 enum machine_mode mode = GET_MODE (operand0);
41861 rtx res, xa, label, TWO52, mask;
41863 res = gen_reg_rtx (mode);
41864 emit_move_insn (res, operand1);
41866 /* xa = abs (operand1) */
41867 xa = ix86_expand_sse_fabs (res, &mask);
41869 /* if (!isless (xa, TWO52)) goto label; */
41870 TWO52 = ix86_gen_TWO52 (mode);
41871 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41873 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41874 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41876 ix86_sse_copysign_to_positive (res, xa, res, mask);
41878 emit_label (label);
41879 LABEL_NUSES (label) = 1;
41881 emit_move_insn (operand0, res);
41884 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41885 into OPERAND0. */
41886 void
41887 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41889 /* C code for the stuff we expand below.
41890 double xa = fabs (x), x2;
41891 if (!isless (xa, TWO52))
41892 return x;
41893 xa = xa + TWO52 - TWO52;
41894 x2 = copysign (xa, x);
41895 Compensate. Floor:
41896 if (x2 > x)
41897 x2 -= 1;
41898 Compensate. Ceil:
41899 if (x2 < x)
41900 x2 -= -1;
41901 return x2;
41903 enum machine_mode mode = GET_MODE (operand0);
41904 rtx xa, TWO52, tmp, label, one, res, mask;
41906 TWO52 = ix86_gen_TWO52 (mode);
41908 /* Temporary for holding the result, initialized to the input
41909 operand to ease control flow. */
41910 res = gen_reg_rtx (mode);
41911 emit_move_insn (res, operand1);
41913 /* xa = abs (operand1) */
41914 xa = ix86_expand_sse_fabs (res, &mask);
41916 /* if (!isless (xa, TWO52)) goto label; */
41917 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41919 /* xa = xa + TWO52 - TWO52; */
41920 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41921 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41923 /* xa = copysign (xa, operand1) */
41924 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41926 /* generate 1.0 or -1.0 */
41927 one = force_reg (mode,
41928 const_double_from_real_value (do_floor
41929 ? dconst1 : dconstm1, mode));
41931 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41932 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41933 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41934 gen_rtx_AND (mode, one, tmp)));
41935 /* We always need to subtract here to preserve signed zero. */
41936 tmp = expand_simple_binop (mode, MINUS,
41937 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41938 emit_move_insn (res, tmp);
41940 emit_label (label);
41941 LABEL_NUSES (label) = 1;
41943 emit_move_insn (operand0, res);
41946 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41947 into OPERAND0. */
41948 void
41949 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41951 /* C code for the stuff we expand below.
41952 double xa = fabs (x), x2;
41953 if (!isless (xa, TWO52))
41954 return x;
41955 x2 = (double)(long)x;
41956 Compensate. Floor:
41957 if (x2 > x)
41958 x2 -= 1;
41959 Compensate. Ceil:
41960 if (x2 < x)
41961 x2 += 1;
41962 if (HONOR_SIGNED_ZEROS (mode))
41963 return copysign (x2, x);
41964 return x2;
41966 enum machine_mode mode = GET_MODE (operand0);
41967 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41969 TWO52 = ix86_gen_TWO52 (mode);
41971 /* Temporary for holding the result, initialized to the input
41972 operand to ease control flow. */
41973 res = gen_reg_rtx (mode);
41974 emit_move_insn (res, operand1);
41976 /* xa = abs (operand1) */
41977 xa = ix86_expand_sse_fabs (res, &mask);
41979 /* if (!isless (xa, TWO52)) goto label; */
41980 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41982 /* xa = (double)(long)x */
41983 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
41984 expand_fix (xi, res, 0);
41985 expand_float (xa, xi, 0);
41987 /* generate 1.0 */
41988 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
41990 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41991 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41992 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41993 gen_rtx_AND (mode, one, tmp)));
41994 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
41995 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41996 emit_move_insn (res, tmp);
41998 if (HONOR_SIGNED_ZEROS (mode))
41999 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42001 emit_label (label);
42002 LABEL_NUSES (label) = 1;
42004 emit_move_insn (operand0, res);
42007 /* Expand SSE sequence for computing round from OPERAND1 storing
42008 into OPERAND0. Sequence that works without relying on DImode truncation
42009 via cvttsd2siq that is only available on 64bit targets. */
42010 void
42011 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42013 /* C code for the stuff we expand below.
42014 double xa = fabs (x), xa2, x2;
42015 if (!isless (xa, TWO52))
42016 return x;
42017 Using the absolute value and copying back sign makes
42018 -0.0 -> -0.0 correct.
42019 xa2 = xa + TWO52 - TWO52;
42020 Compensate.
42021 dxa = xa2 - xa;
42022 if (dxa <= -0.5)
42023 xa2 += 1;
42024 else if (dxa > 0.5)
42025 xa2 -= 1;
42026 x2 = copysign (xa2, x);
42027 return x2;
42029 enum machine_mode mode = GET_MODE (operand0);
42030 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42032 TWO52 = ix86_gen_TWO52 (mode);
42034 /* Temporary for holding the result, initialized to the input
42035 operand to ease control flow. */
42036 res = gen_reg_rtx (mode);
42037 emit_move_insn (res, operand1);
42039 /* xa = abs (operand1) */
42040 xa = ix86_expand_sse_fabs (res, &mask);
42042 /* if (!isless (xa, TWO52)) goto label; */
42043 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42045 /* xa2 = xa + TWO52 - TWO52; */
42046 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42047 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42049 /* dxa = xa2 - xa; */
42050 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42052 /* generate 0.5, 1.0 and -0.5 */
42053 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42054 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42055 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42056 0, OPTAB_DIRECT);
42058 /* Compensate. */
42059 tmp = gen_reg_rtx (mode);
42060 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42061 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42062 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42063 gen_rtx_AND (mode, one, tmp)));
42064 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42065 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42066 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42067 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42068 gen_rtx_AND (mode, one, tmp)));
42069 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42071 /* res = copysign (xa2, operand1) */
42072 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42074 emit_label (label);
42075 LABEL_NUSES (label) = 1;
42077 emit_move_insn (operand0, res);
42080 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42081 into OPERAND0. */
42082 void
42083 ix86_expand_trunc (rtx operand0, rtx operand1)
42085 /* C code for SSE variant we expand below.
42086 double xa = fabs (x), x2;
42087 if (!isless (xa, TWO52))
42088 return x;
42089 x2 = (double)(long)x;
42090 if (HONOR_SIGNED_ZEROS (mode))
42091 return copysign (x2, x);
42092 return x2;
42094 enum machine_mode mode = GET_MODE (operand0);
42095 rtx xa, xi, TWO52, label, res, mask;
42097 TWO52 = ix86_gen_TWO52 (mode);
42099 /* Temporary for holding the result, initialized to the input
42100 operand to ease control flow. */
42101 res = gen_reg_rtx (mode);
42102 emit_move_insn (res, operand1);
42104 /* xa = abs (operand1) */
42105 xa = ix86_expand_sse_fabs (res, &mask);
42107 /* if (!isless (xa, TWO52)) goto label; */
42108 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42110 /* x = (double)(long)x */
42111 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42112 expand_fix (xi, res, 0);
42113 expand_float (res, xi, 0);
42115 if (HONOR_SIGNED_ZEROS (mode))
42116 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42118 emit_label (label);
42119 LABEL_NUSES (label) = 1;
42121 emit_move_insn (operand0, res);
42124 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42125 into OPERAND0. */
42126 void
42127 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42129 enum machine_mode mode = GET_MODE (operand0);
42130 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42132 /* C code for SSE variant we expand below.
42133 double xa = fabs (x), x2;
42134 if (!isless (xa, TWO52))
42135 return x;
42136 xa2 = xa + TWO52 - TWO52;
42137 Compensate:
42138 if (xa2 > xa)
42139 xa2 -= 1.0;
42140 x2 = copysign (xa2, x);
42141 return x2;
42144 TWO52 = ix86_gen_TWO52 (mode);
42146 /* Temporary for holding the result, initialized to the input
42147 operand to ease control flow. */
42148 res = gen_reg_rtx (mode);
42149 emit_move_insn (res, operand1);
42151 /* xa = abs (operand1) */
42152 xa = ix86_expand_sse_fabs (res, &smask);
42154 /* if (!isless (xa, TWO52)) goto label; */
42155 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42157 /* res = xa + TWO52 - TWO52; */
42158 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42159 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42160 emit_move_insn (res, tmp);
42162 /* generate 1.0 */
42163 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42165 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42166 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42167 emit_insn (gen_rtx_SET (VOIDmode, mask,
42168 gen_rtx_AND (mode, mask, one)));
42169 tmp = expand_simple_binop (mode, MINUS,
42170 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42171 emit_move_insn (res, tmp);
42173 /* res = copysign (res, operand1) */
42174 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42176 emit_label (label);
42177 LABEL_NUSES (label) = 1;
42179 emit_move_insn (operand0, res);
42182 /* Expand SSE sequence for computing round from OPERAND1 storing
42183 into OPERAND0. */
42184 void
42185 ix86_expand_round (rtx operand0, rtx operand1)
42187 /* C code for the stuff we're doing below:
42188 double xa = fabs (x);
42189 if (!isless (xa, TWO52))
42190 return x;
42191 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42192 return copysign (xa, x);
42194 enum machine_mode mode = GET_MODE (operand0);
42195 rtx res, TWO52, xa, label, xi, half, mask;
42196 const struct real_format *fmt;
42197 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42199 /* Temporary for holding the result, initialized to the input
42200 operand to ease control flow. */
42201 res = gen_reg_rtx (mode);
42202 emit_move_insn (res, operand1);
42204 TWO52 = ix86_gen_TWO52 (mode);
42205 xa = ix86_expand_sse_fabs (res, &mask);
42206 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42208 /* load nextafter (0.5, 0.0) */
42209 fmt = REAL_MODE_FORMAT (mode);
42210 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42211 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42213 /* xa = xa + 0.5 */
42214 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42215 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42217 /* xa = (double)(int64_t)xa */
42218 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42219 expand_fix (xi, xa, 0);
42220 expand_float (xa, xi, 0);
42222 /* res = copysign (xa, operand1) */
42223 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42225 emit_label (label);
42226 LABEL_NUSES (label) = 1;
42228 emit_move_insn (operand0, res);
42231 /* Expand SSE sequence for computing round
42232 from OP1 storing into OP0 using sse4 round insn. */
42233 void
42234 ix86_expand_round_sse4 (rtx op0, rtx op1)
42236 enum machine_mode mode = GET_MODE (op0);
42237 rtx e1, e2, res, half;
42238 const struct real_format *fmt;
42239 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42240 rtx (*gen_copysign) (rtx, rtx, rtx);
42241 rtx (*gen_round) (rtx, rtx, rtx);
42243 switch (mode)
42245 case SFmode:
42246 gen_copysign = gen_copysignsf3;
42247 gen_round = gen_sse4_1_roundsf2;
42248 break;
42249 case DFmode:
42250 gen_copysign = gen_copysigndf3;
42251 gen_round = gen_sse4_1_rounddf2;
42252 break;
42253 default:
42254 gcc_unreachable ();
42257 /* round (a) = trunc (a + copysign (0.5, a)) */
42259 /* load nextafter (0.5, 0.0) */
42260 fmt = REAL_MODE_FORMAT (mode);
42261 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42262 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42263 half = const_double_from_real_value (pred_half, mode);
42265 /* e1 = copysign (0.5, op1) */
42266 e1 = gen_reg_rtx (mode);
42267 emit_insn (gen_copysign (e1, half, op1));
42269 /* e2 = op1 + e1 */
42270 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42272 /* res = trunc (e2) */
42273 res = gen_reg_rtx (mode);
42274 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42276 emit_move_insn (op0, res);
42280 /* Table of valid machine attributes. */
42281 static const struct attribute_spec ix86_attribute_table[] =
42283 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42284 affects_type_identity } */
42285 /* Stdcall attribute says callee is responsible for popping arguments
42286 if they are not variable. */
42287 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42288 true },
42289 /* Fastcall attribute says callee is responsible for popping arguments
42290 if they are not variable. */
42291 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42292 true },
42293 /* Thiscall attribute says callee is responsible for popping arguments
42294 if they are not variable. */
42295 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42296 true },
42297 /* Cdecl attribute says the callee is a normal C declaration */
42298 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42299 true },
42300 /* Regparm attribute specifies how many integer arguments are to be
42301 passed in registers. */
42302 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42303 true },
42304 /* Sseregparm attribute says we are using x86_64 calling conventions
42305 for FP arguments. */
42306 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42307 true },
42308 /* The transactional memory builtins are implicitly regparm or fastcall
42309 depending on the ABI. Override the generic do-nothing attribute that
42310 these builtins were declared with. */
42311 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42312 true },
42313 /* force_align_arg_pointer says this function realigns the stack at entry. */
42314 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42315 false, true, true, ix86_handle_cconv_attribute, false },
42316 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42317 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42318 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42319 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42320 false },
42321 #endif
42322 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42323 false },
42324 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42325 false },
42326 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42327 SUBTARGET_ATTRIBUTE_TABLE,
42328 #endif
42329 /* ms_abi and sysv_abi calling convention function attributes. */
42330 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42331 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42332 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42333 false },
42334 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42335 ix86_handle_callee_pop_aggregate_return, true },
42336 /* End element. */
42337 { NULL, 0, 0, false, false, false, NULL, false }
42340 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42341 static int
42342 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42343 tree vectype,
42344 int misalign ATTRIBUTE_UNUSED)
42346 unsigned elements;
42348 switch (type_of_cost)
42350 case scalar_stmt:
42351 return ix86_cost->scalar_stmt_cost;
42353 case scalar_load:
42354 return ix86_cost->scalar_load_cost;
42356 case scalar_store:
42357 return ix86_cost->scalar_store_cost;
42359 case vector_stmt:
42360 return ix86_cost->vec_stmt_cost;
42362 case vector_load:
42363 return ix86_cost->vec_align_load_cost;
42365 case vector_store:
42366 return ix86_cost->vec_store_cost;
42368 case vec_to_scalar:
42369 return ix86_cost->vec_to_scalar_cost;
42371 case scalar_to_vec:
42372 return ix86_cost->scalar_to_vec_cost;
42374 case unaligned_load:
42375 case unaligned_store:
42376 return ix86_cost->vec_unalign_load_cost;
42378 case cond_branch_taken:
42379 return ix86_cost->cond_taken_branch_cost;
42381 case cond_branch_not_taken:
42382 return ix86_cost->cond_not_taken_branch_cost;
42384 case vec_perm:
42385 case vec_promote_demote:
42386 return ix86_cost->vec_stmt_cost;
42388 case vec_construct:
42389 elements = TYPE_VECTOR_SUBPARTS (vectype);
42390 return elements / 2 + 1;
42392 default:
42393 gcc_unreachable ();
42397 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42398 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42399 insn every time. */
42401 static GTY(()) rtx vselect_insn;
42403 /* Initialize vselect_insn. */
42405 static void
42406 init_vselect_insn (void)
42408 unsigned i;
42409 rtx x;
42411 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42412 for (i = 0; i < MAX_VECT_LEN; ++i)
42413 XVECEXP (x, 0, i) = const0_rtx;
42414 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42415 const0_rtx), x);
42416 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42417 start_sequence ();
42418 vselect_insn = emit_insn (x);
42419 end_sequence ();
42422 /* Construct (set target (vec_select op0 (parallel perm))) and
42423 return true if that's a valid instruction in the active ISA. */
42425 static bool
42426 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42427 unsigned nelt, bool testing_p)
42429 unsigned int i;
42430 rtx x, save_vconcat;
42431 int icode;
42433 if (vselect_insn == NULL_RTX)
42434 init_vselect_insn ();
42436 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42437 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42438 for (i = 0; i < nelt; ++i)
42439 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42440 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42441 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42442 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42443 SET_DEST (PATTERN (vselect_insn)) = target;
42444 icode = recog_memoized (vselect_insn);
42446 if (icode >= 0 && !testing_p)
42447 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42449 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42450 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42451 INSN_CODE (vselect_insn) = -1;
42453 return icode >= 0;
42456 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42458 static bool
42459 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42460 const unsigned char *perm, unsigned nelt,
42461 bool testing_p)
42463 enum machine_mode v2mode;
42464 rtx x;
42465 bool ok;
42467 if (vselect_insn == NULL_RTX)
42468 init_vselect_insn ();
42470 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42471 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42472 PUT_MODE (x, v2mode);
42473 XEXP (x, 0) = op0;
42474 XEXP (x, 1) = op1;
42475 ok = expand_vselect (target, x, perm, nelt, testing_p);
42476 XEXP (x, 0) = const0_rtx;
42477 XEXP (x, 1) = const0_rtx;
42478 return ok;
42481 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42482 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42484 static bool
42485 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42487 enum machine_mode vmode = d->vmode;
42488 unsigned i, mask, nelt = d->nelt;
42489 rtx target, op0, op1, x;
42490 rtx rperm[32], vperm;
42492 if (d->one_operand_p)
42493 return false;
42494 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42496 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42498 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42500 else
42501 return false;
42503 /* This is a blend, not a permute. Elements must stay in their
42504 respective lanes. */
42505 for (i = 0; i < nelt; ++i)
42507 unsigned e = d->perm[i];
42508 if (!(e == i || e == i + nelt))
42509 return false;
42512 if (d->testing_p)
42513 return true;
42515 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42516 decision should be extracted elsewhere, so that we only try that
42517 sequence once all budget==3 options have been tried. */
42518 target = d->target;
42519 op0 = d->op0;
42520 op1 = d->op1;
42521 mask = 0;
42523 switch (vmode)
42525 case V4DFmode:
42526 case V8SFmode:
42527 case V2DFmode:
42528 case V4SFmode:
42529 case V8HImode:
42530 case V8SImode:
42531 for (i = 0; i < nelt; ++i)
42532 mask |= (d->perm[i] >= nelt) << i;
42533 break;
42535 case V2DImode:
42536 for (i = 0; i < 2; ++i)
42537 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42538 vmode = V8HImode;
42539 goto do_subreg;
42541 case V4SImode:
42542 for (i = 0; i < 4; ++i)
42543 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42544 vmode = V8HImode;
42545 goto do_subreg;
42547 case V16QImode:
42548 /* See if bytes move in pairs so we can use pblendw with
42549 an immediate argument, rather than pblendvb with a vector
42550 argument. */
42551 for (i = 0; i < 16; i += 2)
42552 if (d->perm[i] + 1 != d->perm[i + 1])
42554 use_pblendvb:
42555 for (i = 0; i < nelt; ++i)
42556 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42558 finish_pblendvb:
42559 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42560 vperm = force_reg (vmode, vperm);
42562 if (GET_MODE_SIZE (vmode) == 16)
42563 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42564 else
42565 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42566 if (target != d->target)
42567 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42568 return true;
42571 for (i = 0; i < 8; ++i)
42572 mask |= (d->perm[i * 2] >= 16) << i;
42573 vmode = V8HImode;
42574 /* FALLTHRU */
42576 do_subreg:
42577 target = gen_reg_rtx (vmode);
42578 op0 = gen_lowpart (vmode, op0);
42579 op1 = gen_lowpart (vmode, op1);
42580 break;
42582 case V32QImode:
42583 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42584 for (i = 0; i < 32; i += 2)
42585 if (d->perm[i] + 1 != d->perm[i + 1])
42586 goto use_pblendvb;
42587 /* See if bytes move in quadruplets. If yes, vpblendd
42588 with immediate can be used. */
42589 for (i = 0; i < 32; i += 4)
42590 if (d->perm[i] + 2 != d->perm[i + 2])
42591 break;
42592 if (i < 32)
42594 /* See if bytes move the same in both lanes. If yes,
42595 vpblendw with immediate can be used. */
42596 for (i = 0; i < 16; i += 2)
42597 if (d->perm[i] + 16 != d->perm[i + 16])
42598 goto use_pblendvb;
42600 /* Use vpblendw. */
42601 for (i = 0; i < 16; ++i)
42602 mask |= (d->perm[i * 2] >= 32) << i;
42603 vmode = V16HImode;
42604 goto do_subreg;
42607 /* Use vpblendd. */
42608 for (i = 0; i < 8; ++i)
42609 mask |= (d->perm[i * 4] >= 32) << i;
42610 vmode = V8SImode;
42611 goto do_subreg;
42613 case V16HImode:
42614 /* See if words move in pairs. If yes, vpblendd can be used. */
42615 for (i = 0; i < 16; i += 2)
42616 if (d->perm[i] + 1 != d->perm[i + 1])
42617 break;
42618 if (i < 16)
42620 /* See if words move the same in both lanes. If not,
42621 vpblendvb must be used. */
42622 for (i = 0; i < 8; i++)
42623 if (d->perm[i] + 8 != d->perm[i + 8])
42625 /* Use vpblendvb. */
42626 for (i = 0; i < 32; ++i)
42627 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42629 vmode = V32QImode;
42630 nelt = 32;
42631 target = gen_reg_rtx (vmode);
42632 op0 = gen_lowpart (vmode, op0);
42633 op1 = gen_lowpart (vmode, op1);
42634 goto finish_pblendvb;
42637 /* Use vpblendw. */
42638 for (i = 0; i < 16; ++i)
42639 mask |= (d->perm[i] >= 16) << i;
42640 break;
42643 /* Use vpblendd. */
42644 for (i = 0; i < 8; ++i)
42645 mask |= (d->perm[i * 2] >= 16) << i;
42646 vmode = V8SImode;
42647 goto do_subreg;
42649 case V4DImode:
42650 /* Use vpblendd. */
42651 for (i = 0; i < 4; ++i)
42652 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42653 vmode = V8SImode;
42654 goto do_subreg;
42656 default:
42657 gcc_unreachable ();
42660 /* This matches five different patterns with the different modes. */
42661 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42662 x = gen_rtx_SET (VOIDmode, target, x);
42663 emit_insn (x);
42664 if (target != d->target)
42665 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42667 return true;
42670 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42671 in terms of the variable form of vpermilps.
42673 Note that we will have already failed the immediate input vpermilps,
42674 which requires that the high and low part shuffle be identical; the
42675 variable form doesn't require that. */
42677 static bool
42678 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42680 rtx rperm[8], vperm;
42681 unsigned i;
42683 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42684 return false;
42686 /* We can only permute within the 128-bit lane. */
42687 for (i = 0; i < 8; ++i)
42689 unsigned e = d->perm[i];
42690 if (i < 4 ? e >= 4 : e < 4)
42691 return false;
42694 if (d->testing_p)
42695 return true;
42697 for (i = 0; i < 8; ++i)
42699 unsigned e = d->perm[i];
42701 /* Within each 128-bit lane, the elements of op0 are numbered
42702 from 0 and the elements of op1 are numbered from 4. */
42703 if (e >= 8 + 4)
42704 e -= 8;
42705 else if (e >= 4)
42706 e -= 4;
42708 rperm[i] = GEN_INT (e);
42711 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42712 vperm = force_reg (V8SImode, vperm);
42713 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42715 return true;
42718 /* Return true if permutation D can be performed as VMODE permutation
42719 instead. */
42721 static bool
42722 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42724 unsigned int i, j, chunk;
42726 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42727 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42728 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42729 return false;
42731 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42732 return true;
42734 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42735 for (i = 0; i < d->nelt; i += chunk)
42736 if (d->perm[i] & (chunk - 1))
42737 return false;
42738 else
42739 for (j = 1; j < chunk; ++j)
42740 if (d->perm[i] + j != d->perm[i + j])
42741 return false;
42743 return true;
42746 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42747 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42749 static bool
42750 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42752 unsigned i, nelt, eltsz, mask;
42753 unsigned char perm[32];
42754 enum machine_mode vmode = V16QImode;
42755 rtx rperm[32], vperm, target, op0, op1;
42757 nelt = d->nelt;
42759 if (!d->one_operand_p)
42761 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42763 if (TARGET_AVX2
42764 && valid_perm_using_mode_p (V2TImode, d))
42766 if (d->testing_p)
42767 return true;
42769 /* Use vperm2i128 insn. The pattern uses
42770 V4DImode instead of V2TImode. */
42771 target = d->target;
42772 if (d->vmode != V4DImode)
42773 target = gen_reg_rtx (V4DImode);
42774 op0 = gen_lowpart (V4DImode, d->op0);
42775 op1 = gen_lowpart (V4DImode, d->op1);
42776 rperm[0]
42777 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42778 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42779 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42780 if (target != d->target)
42781 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42782 return true;
42784 return false;
42787 else
42789 if (GET_MODE_SIZE (d->vmode) == 16)
42791 if (!TARGET_SSSE3)
42792 return false;
42794 else if (GET_MODE_SIZE (d->vmode) == 32)
42796 if (!TARGET_AVX2)
42797 return false;
42799 /* V4DImode should be already handled through
42800 expand_vselect by vpermq instruction. */
42801 gcc_assert (d->vmode != V4DImode);
42803 vmode = V32QImode;
42804 if (d->vmode == V8SImode
42805 || d->vmode == V16HImode
42806 || d->vmode == V32QImode)
42808 /* First see if vpermq can be used for
42809 V8SImode/V16HImode/V32QImode. */
42810 if (valid_perm_using_mode_p (V4DImode, d))
42812 for (i = 0; i < 4; i++)
42813 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42814 if (d->testing_p)
42815 return true;
42816 target = gen_reg_rtx (V4DImode);
42817 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42818 perm, 4, false))
42820 emit_move_insn (d->target,
42821 gen_lowpart (d->vmode, target));
42822 return true;
42824 return false;
42827 /* Next see if vpermd can be used. */
42828 if (valid_perm_using_mode_p (V8SImode, d))
42829 vmode = V8SImode;
42831 /* Or if vpermps can be used. */
42832 else if (d->vmode == V8SFmode)
42833 vmode = V8SImode;
42835 if (vmode == V32QImode)
42837 /* vpshufb only works intra lanes, it is not
42838 possible to shuffle bytes in between the lanes. */
42839 for (i = 0; i < nelt; ++i)
42840 if ((d->perm[i] ^ i) & (nelt / 2))
42841 return false;
42844 else
42845 return false;
42848 if (d->testing_p)
42849 return true;
42851 if (vmode == V8SImode)
42852 for (i = 0; i < 8; ++i)
42853 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42854 else
42856 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42857 if (!d->one_operand_p)
42858 mask = 2 * nelt - 1;
42859 else if (vmode == V16QImode)
42860 mask = nelt - 1;
42861 else
42862 mask = nelt / 2 - 1;
42864 for (i = 0; i < nelt; ++i)
42866 unsigned j, e = d->perm[i] & mask;
42867 for (j = 0; j < eltsz; ++j)
42868 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42872 vperm = gen_rtx_CONST_VECTOR (vmode,
42873 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42874 vperm = force_reg (vmode, vperm);
42876 target = d->target;
42877 if (d->vmode != vmode)
42878 target = gen_reg_rtx (vmode);
42879 op0 = gen_lowpart (vmode, d->op0);
42880 if (d->one_operand_p)
42882 if (vmode == V16QImode)
42883 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42884 else if (vmode == V32QImode)
42885 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42886 else if (vmode == V8SFmode)
42887 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42888 else
42889 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42891 else
42893 op1 = gen_lowpart (vmode, d->op1);
42894 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42896 if (target != d->target)
42897 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42899 return true;
42902 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42903 in a single instruction. */
42905 static bool
42906 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42908 unsigned i, nelt = d->nelt;
42909 unsigned char perm2[MAX_VECT_LEN];
42911 /* Check plain VEC_SELECT first, because AVX has instructions that could
42912 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42913 input where SEL+CONCAT may not. */
42914 if (d->one_operand_p)
42916 int mask = nelt - 1;
42917 bool identity_perm = true;
42918 bool broadcast_perm = true;
42920 for (i = 0; i < nelt; i++)
42922 perm2[i] = d->perm[i] & mask;
42923 if (perm2[i] != i)
42924 identity_perm = false;
42925 if (perm2[i])
42926 broadcast_perm = false;
42929 if (identity_perm)
42931 if (!d->testing_p)
42932 emit_move_insn (d->target, d->op0);
42933 return true;
42935 else if (broadcast_perm && TARGET_AVX2)
42937 /* Use vpbroadcast{b,w,d}. */
42938 rtx (*gen) (rtx, rtx) = NULL;
42939 switch (d->vmode)
42941 case V32QImode:
42942 gen = gen_avx2_pbroadcastv32qi_1;
42943 break;
42944 case V16HImode:
42945 gen = gen_avx2_pbroadcastv16hi_1;
42946 break;
42947 case V8SImode:
42948 gen = gen_avx2_pbroadcastv8si_1;
42949 break;
42950 case V16QImode:
42951 gen = gen_avx2_pbroadcastv16qi;
42952 break;
42953 case V8HImode:
42954 gen = gen_avx2_pbroadcastv8hi;
42955 break;
42956 case V8SFmode:
42957 gen = gen_avx2_vec_dupv8sf_1;
42958 break;
42959 /* For other modes prefer other shuffles this function creates. */
42960 default: break;
42962 if (gen != NULL)
42964 if (!d->testing_p)
42965 emit_insn (gen (d->target, d->op0));
42966 return true;
42970 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42971 return true;
42973 /* There are plenty of patterns in sse.md that are written for
42974 SEL+CONCAT and are not replicated for a single op. Perhaps
42975 that should be changed, to avoid the nastiness here. */
42977 /* Recognize interleave style patterns, which means incrementing
42978 every other permutation operand. */
42979 for (i = 0; i < nelt; i += 2)
42981 perm2[i] = d->perm[i] & mask;
42982 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
42984 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42985 d->testing_p))
42986 return true;
42988 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
42989 if (nelt >= 4)
42991 for (i = 0; i < nelt; i += 4)
42993 perm2[i + 0] = d->perm[i + 0] & mask;
42994 perm2[i + 1] = d->perm[i + 1] & mask;
42995 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
42996 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
42999 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43000 d->testing_p))
43001 return true;
43005 /* Finally, try the fully general two operand permute. */
43006 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43007 d->testing_p))
43008 return true;
43010 /* Recognize interleave style patterns with reversed operands. */
43011 if (!d->one_operand_p)
43013 for (i = 0; i < nelt; ++i)
43015 unsigned e = d->perm[i];
43016 if (e >= nelt)
43017 e -= nelt;
43018 else
43019 e += nelt;
43020 perm2[i] = e;
43023 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43024 d->testing_p))
43025 return true;
43028 /* Try the SSE4.1 blend variable merge instructions. */
43029 if (expand_vec_perm_blend (d))
43030 return true;
43032 /* Try one of the AVX vpermil variable permutations. */
43033 if (expand_vec_perm_vpermil (d))
43034 return true;
43036 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43037 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43038 if (expand_vec_perm_pshufb (d))
43039 return true;
43041 /* Try the AVX512F vpermi2 instructions. */
43042 rtx vec[64];
43043 enum machine_mode mode = d->vmode;
43044 if (mode == V8DFmode)
43045 mode = V8DImode;
43046 else if (mode == V16SFmode)
43047 mode = V16SImode;
43048 for (i = 0; i < nelt; ++i)
43049 vec[i] = GEN_INT (d->perm[i]);
43050 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43051 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43052 return true;
43054 return false;
43057 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43058 in terms of a pair of pshuflw + pshufhw instructions. */
43060 static bool
43061 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43063 unsigned char perm2[MAX_VECT_LEN];
43064 unsigned i;
43065 bool ok;
43067 if (d->vmode != V8HImode || !d->one_operand_p)
43068 return false;
43070 /* The two permutations only operate in 64-bit lanes. */
43071 for (i = 0; i < 4; ++i)
43072 if (d->perm[i] >= 4)
43073 return false;
43074 for (i = 4; i < 8; ++i)
43075 if (d->perm[i] < 4)
43076 return false;
43078 if (d->testing_p)
43079 return true;
43081 /* Emit the pshuflw. */
43082 memcpy (perm2, d->perm, 4);
43083 for (i = 4; i < 8; ++i)
43084 perm2[i] = i;
43085 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43086 gcc_assert (ok);
43088 /* Emit the pshufhw. */
43089 memcpy (perm2 + 4, d->perm + 4, 4);
43090 for (i = 0; i < 4; ++i)
43091 perm2[i] = i;
43092 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43093 gcc_assert (ok);
43095 return true;
43098 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43099 the permutation using the SSSE3 palignr instruction. This succeeds
43100 when all of the elements in PERM fit within one vector and we merely
43101 need to shift them down so that a single vector permutation has a
43102 chance to succeed. */
43104 static bool
43105 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43107 unsigned i, nelt = d->nelt;
43108 unsigned min, max;
43109 bool in_order, ok;
43110 rtx shift, target;
43111 struct expand_vec_perm_d dcopy;
43113 /* Even with AVX, palignr only operates on 128-bit vectors. */
43114 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43115 return false;
43117 min = nelt, max = 0;
43118 for (i = 0; i < nelt; ++i)
43120 unsigned e = d->perm[i];
43121 if (e < min)
43122 min = e;
43123 if (e > max)
43124 max = e;
43126 if (min == 0 || max - min >= nelt)
43127 return false;
43129 /* Given that we have SSSE3, we know we'll be able to implement the
43130 single operand permutation after the palignr with pshufb. */
43131 if (d->testing_p)
43132 return true;
43134 dcopy = *d;
43135 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43136 target = gen_reg_rtx (TImode);
43137 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43138 gen_lowpart (TImode, d->op0), shift));
43140 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43141 dcopy.one_operand_p = true;
43143 in_order = true;
43144 for (i = 0; i < nelt; ++i)
43146 unsigned e = dcopy.perm[i] - min;
43147 if (e != i)
43148 in_order = false;
43149 dcopy.perm[i] = e;
43152 /* Test for the degenerate case where the alignment by itself
43153 produces the desired permutation. */
43154 if (in_order)
43156 emit_move_insn (d->target, dcopy.op0);
43157 return true;
43160 ok = expand_vec_perm_1 (&dcopy);
43161 gcc_assert (ok);
43163 return ok;
43166 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43168 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43169 a two vector permutation into a single vector permutation by using
43170 an interleave operation to merge the vectors. */
43172 static bool
43173 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43175 struct expand_vec_perm_d dremap, dfinal;
43176 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43177 unsigned HOST_WIDE_INT contents;
43178 unsigned char remap[2 * MAX_VECT_LEN];
43179 rtx seq;
43180 bool ok, same_halves = false;
43182 if (GET_MODE_SIZE (d->vmode) == 16)
43184 if (d->one_operand_p)
43185 return false;
43187 else if (GET_MODE_SIZE (d->vmode) == 32)
43189 if (!TARGET_AVX)
43190 return false;
43191 /* For 32-byte modes allow even d->one_operand_p.
43192 The lack of cross-lane shuffling in some instructions
43193 might prevent a single insn shuffle. */
43194 dfinal = *d;
43195 dfinal.testing_p = true;
43196 /* If expand_vec_perm_interleave3 can expand this into
43197 a 3 insn sequence, give up and let it be expanded as
43198 3 insn sequence. While that is one insn longer,
43199 it doesn't need a memory operand and in the common
43200 case that both interleave low and high permutations
43201 with the same operands are adjacent needs 4 insns
43202 for both after CSE. */
43203 if (expand_vec_perm_interleave3 (&dfinal))
43204 return false;
43206 else
43207 return false;
43209 /* Examine from whence the elements come. */
43210 contents = 0;
43211 for (i = 0; i < nelt; ++i)
43212 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43214 memset (remap, 0xff, sizeof (remap));
43215 dremap = *d;
43217 if (GET_MODE_SIZE (d->vmode) == 16)
43219 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43221 /* Split the two input vectors into 4 halves. */
43222 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43223 h2 = h1 << nelt2;
43224 h3 = h2 << nelt2;
43225 h4 = h3 << nelt2;
43227 /* If the elements from the low halves use interleave low, and similarly
43228 for interleave high. If the elements are from mis-matched halves, we
43229 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43230 if ((contents & (h1 | h3)) == contents)
43232 /* punpckl* */
43233 for (i = 0; i < nelt2; ++i)
43235 remap[i] = i * 2;
43236 remap[i + nelt] = i * 2 + 1;
43237 dremap.perm[i * 2] = i;
43238 dremap.perm[i * 2 + 1] = i + nelt;
43240 if (!TARGET_SSE2 && d->vmode == V4SImode)
43241 dremap.vmode = V4SFmode;
43243 else if ((contents & (h2 | h4)) == contents)
43245 /* punpckh* */
43246 for (i = 0; i < nelt2; ++i)
43248 remap[i + nelt2] = i * 2;
43249 remap[i + nelt + nelt2] = i * 2 + 1;
43250 dremap.perm[i * 2] = i + nelt2;
43251 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43253 if (!TARGET_SSE2 && d->vmode == V4SImode)
43254 dremap.vmode = V4SFmode;
43256 else if ((contents & (h1 | h4)) == contents)
43258 /* shufps */
43259 for (i = 0; i < nelt2; ++i)
43261 remap[i] = i;
43262 remap[i + nelt + nelt2] = i + nelt2;
43263 dremap.perm[i] = i;
43264 dremap.perm[i + nelt2] = i + nelt + nelt2;
43266 if (nelt != 4)
43268 /* shufpd */
43269 dremap.vmode = V2DImode;
43270 dremap.nelt = 2;
43271 dremap.perm[0] = 0;
43272 dremap.perm[1] = 3;
43275 else if ((contents & (h2 | h3)) == contents)
43277 /* shufps */
43278 for (i = 0; i < nelt2; ++i)
43280 remap[i + nelt2] = i;
43281 remap[i + nelt] = i + nelt2;
43282 dremap.perm[i] = i + nelt2;
43283 dremap.perm[i + nelt2] = i + nelt;
43285 if (nelt != 4)
43287 /* shufpd */
43288 dremap.vmode = V2DImode;
43289 dremap.nelt = 2;
43290 dremap.perm[0] = 1;
43291 dremap.perm[1] = 2;
43294 else
43295 return false;
43297 else
43299 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43300 unsigned HOST_WIDE_INT q[8];
43301 unsigned int nonzero_halves[4];
43303 /* Split the two input vectors into 8 quarters. */
43304 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43305 for (i = 1; i < 8; ++i)
43306 q[i] = q[0] << (nelt4 * i);
43307 for (i = 0; i < 4; ++i)
43308 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43310 nonzero_halves[nzcnt] = i;
43311 ++nzcnt;
43314 if (nzcnt == 1)
43316 gcc_assert (d->one_operand_p);
43317 nonzero_halves[1] = nonzero_halves[0];
43318 same_halves = true;
43320 else if (d->one_operand_p)
43322 gcc_assert (nonzero_halves[0] == 0);
43323 gcc_assert (nonzero_halves[1] == 1);
43326 if (nzcnt <= 2)
43328 if (d->perm[0] / nelt2 == nonzero_halves[1])
43330 /* Attempt to increase the likelihood that dfinal
43331 shuffle will be intra-lane. */
43332 char tmph = nonzero_halves[0];
43333 nonzero_halves[0] = nonzero_halves[1];
43334 nonzero_halves[1] = tmph;
43337 /* vperm2f128 or vperm2i128. */
43338 for (i = 0; i < nelt2; ++i)
43340 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43341 remap[i + nonzero_halves[0] * nelt2] = i;
43342 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43343 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43346 if (d->vmode != V8SFmode
43347 && d->vmode != V4DFmode
43348 && d->vmode != V8SImode)
43350 dremap.vmode = V8SImode;
43351 dremap.nelt = 8;
43352 for (i = 0; i < 4; ++i)
43354 dremap.perm[i] = i + nonzero_halves[0] * 4;
43355 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43359 else if (d->one_operand_p)
43360 return false;
43361 else if (TARGET_AVX2
43362 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43364 /* vpunpckl* */
43365 for (i = 0; i < nelt4; ++i)
43367 remap[i] = i * 2;
43368 remap[i + nelt] = i * 2 + 1;
43369 remap[i + nelt2] = i * 2 + nelt2;
43370 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43371 dremap.perm[i * 2] = i;
43372 dremap.perm[i * 2 + 1] = i + nelt;
43373 dremap.perm[i * 2 + nelt2] = i + nelt2;
43374 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43377 else if (TARGET_AVX2
43378 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43380 /* vpunpckh* */
43381 for (i = 0; i < nelt4; ++i)
43383 remap[i + nelt4] = i * 2;
43384 remap[i + nelt + nelt4] = i * 2 + 1;
43385 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43386 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43387 dremap.perm[i * 2] = i + nelt4;
43388 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43389 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43390 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43393 else
43394 return false;
43397 /* Use the remapping array set up above to move the elements from their
43398 swizzled locations into their final destinations. */
43399 dfinal = *d;
43400 for (i = 0; i < nelt; ++i)
43402 unsigned e = remap[d->perm[i]];
43403 gcc_assert (e < nelt);
43404 /* If same_halves is true, both halves of the remapped vector are the
43405 same. Avoid cross-lane accesses if possible. */
43406 if (same_halves && i >= nelt2)
43408 gcc_assert (e < nelt2);
43409 dfinal.perm[i] = e + nelt2;
43411 else
43412 dfinal.perm[i] = e;
43414 dremap.target = gen_reg_rtx (dremap.vmode);
43415 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43416 dfinal.op1 = dfinal.op0;
43417 dfinal.one_operand_p = true;
43419 /* Test if the final remap can be done with a single insn. For V4SFmode or
43420 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43421 start_sequence ();
43422 ok = expand_vec_perm_1 (&dfinal);
43423 seq = get_insns ();
43424 end_sequence ();
43426 if (!ok)
43427 return false;
43429 if (d->testing_p)
43430 return true;
43432 if (dremap.vmode != dfinal.vmode)
43434 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43435 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43438 ok = expand_vec_perm_1 (&dremap);
43439 gcc_assert (ok);
43441 emit_insn (seq);
43442 return true;
43445 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43446 a single vector cross-lane permutation into vpermq followed
43447 by any of the single insn permutations. */
43449 static bool
43450 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43452 struct expand_vec_perm_d dremap, dfinal;
43453 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43454 unsigned contents[2];
43455 bool ok;
43457 if (!(TARGET_AVX2
43458 && (d->vmode == V32QImode || d->vmode == V16HImode)
43459 && d->one_operand_p))
43460 return false;
43462 contents[0] = 0;
43463 contents[1] = 0;
43464 for (i = 0; i < nelt2; ++i)
43466 contents[0] |= 1u << (d->perm[i] / nelt4);
43467 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43470 for (i = 0; i < 2; ++i)
43472 unsigned int cnt = 0;
43473 for (j = 0; j < 4; ++j)
43474 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43475 return false;
43478 if (d->testing_p)
43479 return true;
43481 dremap = *d;
43482 dremap.vmode = V4DImode;
43483 dremap.nelt = 4;
43484 dremap.target = gen_reg_rtx (V4DImode);
43485 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43486 dremap.op1 = dremap.op0;
43487 dremap.one_operand_p = true;
43488 for (i = 0; i < 2; ++i)
43490 unsigned int cnt = 0;
43491 for (j = 0; j < 4; ++j)
43492 if ((contents[i] & (1u << j)) != 0)
43493 dremap.perm[2 * i + cnt++] = j;
43494 for (; cnt < 2; ++cnt)
43495 dremap.perm[2 * i + cnt] = 0;
43498 dfinal = *d;
43499 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43500 dfinal.op1 = dfinal.op0;
43501 dfinal.one_operand_p = true;
43502 for (i = 0, j = 0; i < nelt; ++i)
43504 if (i == nelt2)
43505 j = 2;
43506 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43507 if ((d->perm[i] / nelt4) == dremap.perm[j])
43509 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43510 dfinal.perm[i] |= nelt4;
43511 else
43512 gcc_unreachable ();
43515 ok = expand_vec_perm_1 (&dremap);
43516 gcc_assert (ok);
43518 ok = expand_vec_perm_1 (&dfinal);
43519 gcc_assert (ok);
43521 return true;
43524 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43525 a vector permutation using two instructions, vperm2f128 resp.
43526 vperm2i128 followed by any single in-lane permutation. */
43528 static bool
43529 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43531 struct expand_vec_perm_d dfirst, dsecond;
43532 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43533 bool ok;
43535 if (!TARGET_AVX
43536 || GET_MODE_SIZE (d->vmode) != 32
43537 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43538 return false;
43540 dsecond = *d;
43541 dsecond.one_operand_p = false;
43542 dsecond.testing_p = true;
43544 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43545 immediate. For perm < 16 the second permutation uses
43546 d->op0 as first operand, for perm >= 16 it uses d->op1
43547 as first operand. The second operand is the result of
43548 vperm2[fi]128. */
43549 for (perm = 0; perm < 32; perm++)
43551 /* Ignore permutations which do not move anything cross-lane. */
43552 if (perm < 16)
43554 /* The second shuffle for e.g. V4DFmode has
43555 0123 and ABCD operands.
43556 Ignore AB23, as 23 is already in the second lane
43557 of the first operand. */
43558 if ((perm & 0xc) == (1 << 2)) continue;
43559 /* And 01CD, as 01 is in the first lane of the first
43560 operand. */
43561 if ((perm & 3) == 0) continue;
43562 /* And 4567, as then the vperm2[fi]128 doesn't change
43563 anything on the original 4567 second operand. */
43564 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43566 else
43568 /* The second shuffle for e.g. V4DFmode has
43569 4567 and ABCD operands.
43570 Ignore AB67, as 67 is already in the second lane
43571 of the first operand. */
43572 if ((perm & 0xc) == (3 << 2)) continue;
43573 /* And 45CD, as 45 is in the first lane of the first
43574 operand. */
43575 if ((perm & 3) == 2) continue;
43576 /* And 0123, as then the vperm2[fi]128 doesn't change
43577 anything on the original 0123 first operand. */
43578 if ((perm & 0xf) == (1 << 2)) continue;
43581 for (i = 0; i < nelt; i++)
43583 j = d->perm[i] / nelt2;
43584 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43585 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43586 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43587 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43588 else
43589 break;
43592 if (i == nelt)
43594 start_sequence ();
43595 ok = expand_vec_perm_1 (&dsecond);
43596 end_sequence ();
43598 else
43599 ok = false;
43601 if (ok)
43603 if (d->testing_p)
43604 return true;
43606 /* Found a usable second shuffle. dfirst will be
43607 vperm2f128 on d->op0 and d->op1. */
43608 dsecond.testing_p = false;
43609 dfirst = *d;
43610 dfirst.target = gen_reg_rtx (d->vmode);
43611 for (i = 0; i < nelt; i++)
43612 dfirst.perm[i] = (i & (nelt2 - 1))
43613 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43615 ok = expand_vec_perm_1 (&dfirst);
43616 gcc_assert (ok);
43618 /* And dsecond is some single insn shuffle, taking
43619 d->op0 and result of vperm2f128 (if perm < 16) or
43620 d->op1 and result of vperm2f128 (otherwise). */
43621 dsecond.op1 = dfirst.target;
43622 if (perm >= 16)
43623 dsecond.op0 = dfirst.op1;
43625 ok = expand_vec_perm_1 (&dsecond);
43626 gcc_assert (ok);
43628 return true;
43631 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43632 if (d->one_operand_p)
43633 return false;
43636 return false;
43639 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43640 a two vector permutation using 2 intra-lane interleave insns
43641 and cross-lane shuffle for 32-byte vectors. */
43643 static bool
43644 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43646 unsigned i, nelt;
43647 rtx (*gen) (rtx, rtx, rtx);
43649 if (d->one_operand_p)
43650 return false;
43651 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43653 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43655 else
43656 return false;
43658 nelt = d->nelt;
43659 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43660 return false;
43661 for (i = 0; i < nelt; i += 2)
43662 if (d->perm[i] != d->perm[0] + i / 2
43663 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43664 return false;
43666 if (d->testing_p)
43667 return true;
43669 switch (d->vmode)
43671 case V32QImode:
43672 if (d->perm[0])
43673 gen = gen_vec_interleave_highv32qi;
43674 else
43675 gen = gen_vec_interleave_lowv32qi;
43676 break;
43677 case V16HImode:
43678 if (d->perm[0])
43679 gen = gen_vec_interleave_highv16hi;
43680 else
43681 gen = gen_vec_interleave_lowv16hi;
43682 break;
43683 case V8SImode:
43684 if (d->perm[0])
43685 gen = gen_vec_interleave_highv8si;
43686 else
43687 gen = gen_vec_interleave_lowv8si;
43688 break;
43689 case V4DImode:
43690 if (d->perm[0])
43691 gen = gen_vec_interleave_highv4di;
43692 else
43693 gen = gen_vec_interleave_lowv4di;
43694 break;
43695 case V8SFmode:
43696 if (d->perm[0])
43697 gen = gen_vec_interleave_highv8sf;
43698 else
43699 gen = gen_vec_interleave_lowv8sf;
43700 break;
43701 case V4DFmode:
43702 if (d->perm[0])
43703 gen = gen_vec_interleave_highv4df;
43704 else
43705 gen = gen_vec_interleave_lowv4df;
43706 break;
43707 default:
43708 gcc_unreachable ();
43711 emit_insn (gen (d->target, d->op0, d->op1));
43712 return true;
43715 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43716 a single vector permutation using a single intra-lane vector
43717 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43718 the non-swapped and swapped vectors together. */
43720 static bool
43721 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43723 struct expand_vec_perm_d dfirst, dsecond;
43724 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43725 rtx seq;
43726 bool ok;
43727 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43729 if (!TARGET_AVX
43730 || TARGET_AVX2
43731 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43732 || !d->one_operand_p)
43733 return false;
43735 dfirst = *d;
43736 for (i = 0; i < nelt; i++)
43737 dfirst.perm[i] = 0xff;
43738 for (i = 0, msk = 0; i < nelt; i++)
43740 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43741 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43742 return false;
43743 dfirst.perm[j] = d->perm[i];
43744 if (j != i)
43745 msk |= (1 << i);
43747 for (i = 0; i < nelt; i++)
43748 if (dfirst.perm[i] == 0xff)
43749 dfirst.perm[i] = i;
43751 if (!d->testing_p)
43752 dfirst.target = gen_reg_rtx (dfirst.vmode);
43754 start_sequence ();
43755 ok = expand_vec_perm_1 (&dfirst);
43756 seq = get_insns ();
43757 end_sequence ();
43759 if (!ok)
43760 return false;
43762 if (d->testing_p)
43763 return true;
43765 emit_insn (seq);
43767 dsecond = *d;
43768 dsecond.op0 = dfirst.target;
43769 dsecond.op1 = dfirst.target;
43770 dsecond.one_operand_p = true;
43771 dsecond.target = gen_reg_rtx (dsecond.vmode);
43772 for (i = 0; i < nelt; i++)
43773 dsecond.perm[i] = i ^ nelt2;
43775 ok = expand_vec_perm_1 (&dsecond);
43776 gcc_assert (ok);
43778 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43779 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43780 return true;
43783 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43784 permutation using two vperm2f128, followed by a vshufpd insn blending
43785 the two vectors together. */
43787 static bool
43788 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43790 struct expand_vec_perm_d dfirst, dsecond, dthird;
43791 bool ok;
43793 if (!TARGET_AVX || (d->vmode != V4DFmode))
43794 return false;
43796 if (d->testing_p)
43797 return true;
43799 dfirst = *d;
43800 dsecond = *d;
43801 dthird = *d;
43803 dfirst.perm[0] = (d->perm[0] & ~1);
43804 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43805 dfirst.perm[2] = (d->perm[2] & ~1);
43806 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43807 dsecond.perm[0] = (d->perm[1] & ~1);
43808 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43809 dsecond.perm[2] = (d->perm[3] & ~1);
43810 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43811 dthird.perm[0] = (d->perm[0] % 2);
43812 dthird.perm[1] = (d->perm[1] % 2) + 4;
43813 dthird.perm[2] = (d->perm[2] % 2) + 2;
43814 dthird.perm[3] = (d->perm[3] % 2) + 6;
43816 dfirst.target = gen_reg_rtx (dfirst.vmode);
43817 dsecond.target = gen_reg_rtx (dsecond.vmode);
43818 dthird.op0 = dfirst.target;
43819 dthird.op1 = dsecond.target;
43820 dthird.one_operand_p = false;
43822 canonicalize_perm (&dfirst);
43823 canonicalize_perm (&dsecond);
43825 ok = expand_vec_perm_1 (&dfirst)
43826 && expand_vec_perm_1 (&dsecond)
43827 && expand_vec_perm_1 (&dthird);
43829 gcc_assert (ok);
43831 return true;
43834 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43835 permutation with two pshufb insns and an ior. We should have already
43836 failed all two instruction sequences. */
43838 static bool
43839 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43841 rtx rperm[2][16], vperm, l, h, op, m128;
43842 unsigned int i, nelt, eltsz;
43844 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43845 return false;
43846 gcc_assert (!d->one_operand_p);
43848 nelt = d->nelt;
43849 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43851 /* Generate two permutation masks. If the required element is within
43852 the given vector it is shuffled into the proper lane. If the required
43853 element is in the other vector, force a zero into the lane by setting
43854 bit 7 in the permutation mask. */
43855 m128 = GEN_INT (-128);
43856 for (i = 0; i < nelt; ++i)
43858 unsigned j, e = d->perm[i];
43859 unsigned which = (e >= nelt);
43860 if (e >= nelt)
43861 e -= nelt;
43863 for (j = 0; j < eltsz; ++j)
43865 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43866 rperm[1-which][i*eltsz + j] = m128;
43870 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43871 vperm = force_reg (V16QImode, vperm);
43873 l = gen_reg_rtx (V16QImode);
43874 op = gen_lowpart (V16QImode, d->op0);
43875 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43877 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43878 vperm = force_reg (V16QImode, vperm);
43880 h = gen_reg_rtx (V16QImode);
43881 op = gen_lowpart (V16QImode, d->op1);
43882 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43884 op = d->target;
43885 if (d->vmode != V16QImode)
43886 op = gen_reg_rtx (V16QImode);
43887 emit_insn (gen_iorv16qi3 (op, l, h));
43888 if (op != d->target)
43889 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43891 return true;
43894 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43895 with two vpshufb insns, vpermq and vpor. We should have already failed
43896 all two or three instruction sequences. */
43898 static bool
43899 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43901 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43902 unsigned int i, nelt, eltsz;
43904 if (!TARGET_AVX2
43905 || !d->one_operand_p
43906 || (d->vmode != V32QImode && d->vmode != V16HImode))
43907 return false;
43909 if (d->testing_p)
43910 return true;
43912 nelt = d->nelt;
43913 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43915 /* Generate two permutation masks. If the required element is within
43916 the same lane, it is shuffled in. If the required element from the
43917 other lane, force a zero by setting bit 7 in the permutation mask.
43918 In the other mask the mask has non-negative elements if element
43919 is requested from the other lane, but also moved to the other lane,
43920 so that the result of vpshufb can have the two V2TImode halves
43921 swapped. */
43922 m128 = GEN_INT (-128);
43923 for (i = 0; i < nelt; ++i)
43925 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43926 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43928 for (j = 0; j < eltsz; ++j)
43930 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43931 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43935 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43936 vperm = force_reg (V32QImode, vperm);
43938 h = gen_reg_rtx (V32QImode);
43939 op = gen_lowpart (V32QImode, d->op0);
43940 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43942 /* Swap the 128-byte lanes of h into hp. */
43943 hp = gen_reg_rtx (V4DImode);
43944 op = gen_lowpart (V4DImode, h);
43945 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43946 const1_rtx));
43948 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43949 vperm = force_reg (V32QImode, vperm);
43951 l = gen_reg_rtx (V32QImode);
43952 op = gen_lowpart (V32QImode, d->op0);
43953 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43955 op = d->target;
43956 if (d->vmode != V32QImode)
43957 op = gen_reg_rtx (V32QImode);
43958 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43959 if (op != d->target)
43960 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43962 return true;
43965 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43966 and extract-odd permutations of two V32QImode and V16QImode operand
43967 with two vpshufb insns, vpor and vpermq. We should have already
43968 failed all two or three instruction sequences. */
43970 static bool
43971 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
43973 rtx rperm[2][32], vperm, l, h, ior, op, m128;
43974 unsigned int i, nelt, eltsz;
43976 if (!TARGET_AVX2
43977 || d->one_operand_p
43978 || (d->vmode != V32QImode && d->vmode != V16HImode))
43979 return false;
43981 for (i = 0; i < d->nelt; ++i)
43982 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
43983 return false;
43985 if (d->testing_p)
43986 return true;
43988 nelt = d->nelt;
43989 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43991 /* Generate two permutation masks. In the first permutation mask
43992 the first quarter will contain indexes for the first half
43993 of the op0, the second quarter will contain bit 7 set, third quarter
43994 will contain indexes for the second half of the op0 and the
43995 last quarter bit 7 set. In the second permutation mask
43996 the first quarter will contain bit 7 set, the second quarter
43997 indexes for the first half of the op1, the third quarter bit 7 set
43998 and last quarter indexes for the second half of the op1.
43999 I.e. the first mask e.g. for V32QImode extract even will be:
44000 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44001 (all values masked with 0xf except for -128) and second mask
44002 for extract even will be
44003 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44004 m128 = GEN_INT (-128);
44005 for (i = 0; i < nelt; ++i)
44007 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44008 unsigned which = d->perm[i] >= nelt;
44009 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44011 for (j = 0; j < eltsz; ++j)
44013 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44014 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44018 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44019 vperm = force_reg (V32QImode, vperm);
44021 l = gen_reg_rtx (V32QImode);
44022 op = gen_lowpart (V32QImode, d->op0);
44023 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44025 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44026 vperm = force_reg (V32QImode, vperm);
44028 h = gen_reg_rtx (V32QImode);
44029 op = gen_lowpart (V32QImode, d->op1);
44030 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44032 ior = gen_reg_rtx (V32QImode);
44033 emit_insn (gen_iorv32qi3 (ior, l, h));
44035 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44036 op = gen_reg_rtx (V4DImode);
44037 ior = gen_lowpart (V4DImode, ior);
44038 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44039 const1_rtx, GEN_INT (3)));
44040 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44042 return true;
44045 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44046 and extract-odd permutations. */
44048 static bool
44049 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44051 rtx t1, t2, t3, t4, t5;
44053 switch (d->vmode)
44055 case V4DFmode:
44056 t1 = gen_reg_rtx (V4DFmode);
44057 t2 = gen_reg_rtx (V4DFmode);
44059 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44060 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44061 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44063 /* Now an unpck[lh]pd will produce the result required. */
44064 if (odd)
44065 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44066 else
44067 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44068 emit_insn (t3);
44069 break;
44071 case V8SFmode:
44073 int mask = odd ? 0xdd : 0x88;
44075 t1 = gen_reg_rtx (V8SFmode);
44076 t2 = gen_reg_rtx (V8SFmode);
44077 t3 = gen_reg_rtx (V8SFmode);
44079 /* Shuffle within the 128-bit lanes to produce:
44080 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44081 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44082 GEN_INT (mask)));
44084 /* Shuffle the lanes around to produce:
44085 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44086 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44087 GEN_INT (0x3)));
44089 /* Shuffle within the 128-bit lanes to produce:
44090 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44091 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44093 /* Shuffle within the 128-bit lanes to produce:
44094 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44095 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44097 /* Shuffle the lanes around to produce:
44098 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44099 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44100 GEN_INT (0x20)));
44102 break;
44104 case V2DFmode:
44105 case V4SFmode:
44106 case V2DImode:
44107 case V4SImode:
44108 /* These are always directly implementable by expand_vec_perm_1. */
44109 gcc_unreachable ();
44111 case V8HImode:
44112 if (TARGET_SSSE3)
44113 return expand_vec_perm_pshufb2 (d);
44114 else
44116 /* We need 2*log2(N)-1 operations to achieve odd/even
44117 with interleave. */
44118 t1 = gen_reg_rtx (V8HImode);
44119 t2 = gen_reg_rtx (V8HImode);
44120 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44121 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44122 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44123 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44124 if (odd)
44125 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44126 else
44127 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44128 emit_insn (t3);
44130 break;
44132 case V16QImode:
44133 if (TARGET_SSSE3)
44134 return expand_vec_perm_pshufb2 (d);
44135 else
44137 t1 = gen_reg_rtx (V16QImode);
44138 t2 = gen_reg_rtx (V16QImode);
44139 t3 = gen_reg_rtx (V16QImode);
44140 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44141 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44142 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44143 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44144 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44145 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44146 if (odd)
44147 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44148 else
44149 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44150 emit_insn (t3);
44152 break;
44154 case V16HImode:
44155 case V32QImode:
44156 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44158 case V4DImode:
44159 if (!TARGET_AVX2)
44161 struct expand_vec_perm_d d_copy = *d;
44162 d_copy.vmode = V4DFmode;
44163 d_copy.target = gen_reg_rtx (V4DFmode);
44164 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44165 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44166 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44168 if (!d->testing_p)
44169 emit_move_insn (d->target,
44170 gen_lowpart (V4DImode, d_copy.target));
44171 return true;
44173 return false;
44176 t1 = gen_reg_rtx (V4DImode);
44177 t2 = gen_reg_rtx (V4DImode);
44179 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44180 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44181 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44183 /* Now an vpunpck[lh]qdq will produce the result required. */
44184 if (odd)
44185 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44186 else
44187 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44188 emit_insn (t3);
44189 break;
44191 case V8SImode:
44192 if (!TARGET_AVX2)
44194 struct expand_vec_perm_d d_copy = *d;
44195 d_copy.vmode = V8SFmode;
44196 d_copy.target = gen_reg_rtx (V8SFmode);
44197 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44198 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44199 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44201 if (!d->testing_p)
44202 emit_move_insn (d->target,
44203 gen_lowpart (V8SImode, d_copy.target));
44204 return true;
44206 return false;
44209 t1 = gen_reg_rtx (V8SImode);
44210 t2 = gen_reg_rtx (V8SImode);
44211 t3 = gen_reg_rtx (V4DImode);
44212 t4 = gen_reg_rtx (V4DImode);
44213 t5 = gen_reg_rtx (V4DImode);
44215 /* Shuffle the lanes around into
44216 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44217 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44218 gen_lowpart (V4DImode, d->op1),
44219 GEN_INT (0x20)));
44220 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44221 gen_lowpart (V4DImode, d->op1),
44222 GEN_INT (0x31)));
44224 /* Swap the 2nd and 3rd position in each lane into
44225 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44226 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44227 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44228 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44229 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44231 /* Now an vpunpck[lh]qdq will produce
44232 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44233 if (odd)
44234 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44235 gen_lowpart (V4DImode, t2));
44236 else
44237 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44238 gen_lowpart (V4DImode, t2));
44239 emit_insn (t3);
44240 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44241 break;
44243 default:
44244 gcc_unreachable ();
44247 return true;
44250 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44251 extract-even and extract-odd permutations. */
44253 static bool
44254 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44256 unsigned i, odd, nelt = d->nelt;
44258 odd = d->perm[0];
44259 if (odd != 0 && odd != 1)
44260 return false;
44262 for (i = 1; i < nelt; ++i)
44263 if (d->perm[i] != 2 * i + odd)
44264 return false;
44266 return expand_vec_perm_even_odd_1 (d, odd);
44269 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44270 permutations. We assume that expand_vec_perm_1 has already failed. */
44272 static bool
44273 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44275 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44276 enum machine_mode vmode = d->vmode;
44277 unsigned char perm2[4];
44278 rtx op0 = d->op0, dest;
44279 bool ok;
44281 switch (vmode)
44283 case V4DFmode:
44284 case V8SFmode:
44285 /* These are special-cased in sse.md so that we can optionally
44286 use the vbroadcast instruction. They expand to two insns
44287 if the input happens to be in a register. */
44288 gcc_unreachable ();
44290 case V2DFmode:
44291 case V2DImode:
44292 case V4SFmode:
44293 case V4SImode:
44294 /* These are always implementable using standard shuffle patterns. */
44295 gcc_unreachable ();
44297 case V8HImode:
44298 case V16QImode:
44299 /* These can be implemented via interleave. We save one insn by
44300 stopping once we have promoted to V4SImode and then use pshufd. */
44303 rtx dest;
44304 rtx (*gen) (rtx, rtx, rtx)
44305 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44306 : gen_vec_interleave_lowv8hi;
44308 if (elt >= nelt2)
44310 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44311 : gen_vec_interleave_highv8hi;
44312 elt -= nelt2;
44314 nelt2 /= 2;
44316 dest = gen_reg_rtx (vmode);
44317 emit_insn (gen (dest, op0, op0));
44318 vmode = get_mode_wider_vector (vmode);
44319 op0 = gen_lowpart (vmode, dest);
44321 while (vmode != V4SImode);
44323 memset (perm2, elt, 4);
44324 dest = gen_reg_rtx (V4SImode);
44325 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44326 gcc_assert (ok);
44327 if (!d->testing_p)
44328 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44329 return true;
44331 case V32QImode:
44332 case V16HImode:
44333 case V8SImode:
44334 case V4DImode:
44335 /* For AVX2 broadcasts of the first element vpbroadcast* or
44336 vpermq should be used by expand_vec_perm_1. */
44337 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44338 return false;
44340 default:
44341 gcc_unreachable ();
44345 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44346 broadcast permutations. */
44348 static bool
44349 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44351 unsigned i, elt, nelt = d->nelt;
44353 if (!d->one_operand_p)
44354 return false;
44356 elt = d->perm[0];
44357 for (i = 1; i < nelt; ++i)
44358 if (d->perm[i] != elt)
44359 return false;
44361 return expand_vec_perm_broadcast_1 (d);
44364 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44365 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44366 all the shorter instruction sequences. */
44368 static bool
44369 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44371 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44372 unsigned int i, nelt, eltsz;
44373 bool used[4];
44375 if (!TARGET_AVX2
44376 || d->one_operand_p
44377 || (d->vmode != V32QImode && d->vmode != V16HImode))
44378 return false;
44380 if (d->testing_p)
44381 return true;
44383 nelt = d->nelt;
44384 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44386 /* Generate 4 permutation masks. If the required element is within
44387 the same lane, it is shuffled in. If the required element from the
44388 other lane, force a zero by setting bit 7 in the permutation mask.
44389 In the other mask the mask has non-negative elements if element
44390 is requested from the other lane, but also moved to the other lane,
44391 so that the result of vpshufb can have the two V2TImode halves
44392 swapped. */
44393 m128 = GEN_INT (-128);
44394 for (i = 0; i < 32; ++i)
44396 rperm[0][i] = m128;
44397 rperm[1][i] = m128;
44398 rperm[2][i] = m128;
44399 rperm[3][i] = m128;
44401 used[0] = false;
44402 used[1] = false;
44403 used[2] = false;
44404 used[3] = false;
44405 for (i = 0; i < nelt; ++i)
44407 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44408 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44409 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44411 for (j = 0; j < eltsz; ++j)
44412 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44413 used[which] = true;
44416 for (i = 0; i < 2; ++i)
44418 if (!used[2 * i + 1])
44420 h[i] = NULL_RTX;
44421 continue;
44423 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44424 gen_rtvec_v (32, rperm[2 * i + 1]));
44425 vperm = force_reg (V32QImode, vperm);
44426 h[i] = gen_reg_rtx (V32QImode);
44427 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44428 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44431 /* Swap the 128-byte lanes of h[X]. */
44432 for (i = 0; i < 2; ++i)
44434 if (h[i] == NULL_RTX)
44435 continue;
44436 op = gen_reg_rtx (V4DImode);
44437 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44438 const2_rtx, GEN_INT (3), const0_rtx,
44439 const1_rtx));
44440 h[i] = gen_lowpart (V32QImode, op);
44443 for (i = 0; i < 2; ++i)
44445 if (!used[2 * i])
44447 l[i] = NULL_RTX;
44448 continue;
44450 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44451 vperm = force_reg (V32QImode, vperm);
44452 l[i] = gen_reg_rtx (V32QImode);
44453 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44454 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44457 for (i = 0; i < 2; ++i)
44459 if (h[i] && l[i])
44461 op = gen_reg_rtx (V32QImode);
44462 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44463 l[i] = op;
44465 else if (h[i])
44466 l[i] = h[i];
44469 gcc_assert (l[0] && l[1]);
44470 op = d->target;
44471 if (d->vmode != V32QImode)
44472 op = gen_reg_rtx (V32QImode);
44473 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44474 if (op != d->target)
44475 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44476 return true;
44479 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44480 With all of the interface bits taken care of, perform the expansion
44481 in D and return true on success. */
44483 static bool
44484 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44486 /* Try a single instruction expansion. */
44487 if (expand_vec_perm_1 (d))
44488 return true;
44490 /* Try sequences of two instructions. */
44492 if (expand_vec_perm_pshuflw_pshufhw (d))
44493 return true;
44495 if (expand_vec_perm_palignr (d))
44496 return true;
44498 if (expand_vec_perm_interleave2 (d))
44499 return true;
44501 if (expand_vec_perm_broadcast (d))
44502 return true;
44504 if (expand_vec_perm_vpermq_perm_1 (d))
44505 return true;
44507 if (expand_vec_perm_vperm2f128 (d))
44508 return true;
44510 /* Try sequences of three instructions. */
44512 if (expand_vec_perm_2vperm2f128_vshuf (d))
44513 return true;
44515 if (expand_vec_perm_pshufb2 (d))
44516 return true;
44518 if (expand_vec_perm_interleave3 (d))
44519 return true;
44521 if (expand_vec_perm_vperm2f128_vblend (d))
44522 return true;
44524 /* Try sequences of four instructions. */
44526 if (expand_vec_perm_vpshufb2_vpermq (d))
44527 return true;
44529 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44530 return true;
44532 /* ??? Look for narrow permutations whose element orderings would
44533 allow the promotion to a wider mode. */
44535 /* ??? Look for sequences of interleave or a wider permute that place
44536 the data into the correct lanes for a half-vector shuffle like
44537 pshuf[lh]w or vpermilps. */
44539 /* ??? Look for sequences of interleave that produce the desired results.
44540 The combinatorics of punpck[lh] get pretty ugly... */
44542 if (expand_vec_perm_even_odd (d))
44543 return true;
44545 /* Even longer sequences. */
44546 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44547 return true;
44549 return false;
44552 /* If a permutation only uses one operand, make it clear. Returns true
44553 if the permutation references both operands. */
44555 static bool
44556 canonicalize_perm (struct expand_vec_perm_d *d)
44558 int i, which, nelt = d->nelt;
44560 for (i = which = 0; i < nelt; ++i)
44561 which |= (d->perm[i] < nelt ? 1 : 2);
44563 d->one_operand_p = true;
44564 switch (which)
44566 default:
44567 gcc_unreachable();
44569 case 3:
44570 if (!rtx_equal_p (d->op0, d->op1))
44572 d->one_operand_p = false;
44573 break;
44575 /* The elements of PERM do not suggest that only the first operand
44576 is used, but both operands are identical. Allow easier matching
44577 of the permutation by folding the permutation into the single
44578 input vector. */
44579 /* FALLTHRU */
44581 case 2:
44582 for (i = 0; i < nelt; ++i)
44583 d->perm[i] &= nelt - 1;
44584 d->op0 = d->op1;
44585 break;
44587 case 1:
44588 d->op1 = d->op0;
44589 break;
44592 return (which == 3);
44595 bool
44596 ix86_expand_vec_perm_const (rtx operands[4])
44598 struct expand_vec_perm_d d;
44599 unsigned char perm[MAX_VECT_LEN];
44600 int i, nelt;
44601 bool two_args;
44602 rtx sel;
44604 d.target = operands[0];
44605 d.op0 = operands[1];
44606 d.op1 = operands[2];
44607 sel = operands[3];
44609 d.vmode = GET_MODE (d.target);
44610 gcc_assert (VECTOR_MODE_P (d.vmode));
44611 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44612 d.testing_p = false;
44614 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44615 gcc_assert (XVECLEN (sel, 0) == nelt);
44616 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44618 for (i = 0; i < nelt; ++i)
44620 rtx e = XVECEXP (sel, 0, i);
44621 int ei = INTVAL (e) & (2 * nelt - 1);
44622 d.perm[i] = ei;
44623 perm[i] = ei;
44626 two_args = canonicalize_perm (&d);
44628 if (ix86_expand_vec_perm_const_1 (&d))
44629 return true;
44631 /* If the selector says both arguments are needed, but the operands are the
44632 same, the above tried to expand with one_operand_p and flattened selector.
44633 If that didn't work, retry without one_operand_p; we succeeded with that
44634 during testing. */
44635 if (two_args && d.one_operand_p)
44637 d.one_operand_p = false;
44638 memcpy (d.perm, perm, sizeof (perm));
44639 return ix86_expand_vec_perm_const_1 (&d);
44642 return false;
44645 /* Implement targetm.vectorize.vec_perm_const_ok. */
44647 static bool
44648 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44649 const unsigned char *sel)
44651 struct expand_vec_perm_d d;
44652 unsigned int i, nelt, which;
44653 bool ret;
44655 d.vmode = vmode;
44656 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44657 d.testing_p = true;
44659 /* Given sufficient ISA support we can just return true here
44660 for selected vector modes. */
44661 if (d.vmode == V16SImode || d.vmode == V16SFmode
44662 || d.vmode == V8DFmode || d.vmode == V8DImode)
44663 /* All implementable with a single vpermi2 insn. */
44664 return true;
44665 if (GET_MODE_SIZE (d.vmode) == 16)
44667 /* All implementable with a single vpperm insn. */
44668 if (TARGET_XOP)
44669 return true;
44670 /* All implementable with 2 pshufb + 1 ior. */
44671 if (TARGET_SSSE3)
44672 return true;
44673 /* All implementable with shufpd or unpck[lh]pd. */
44674 if (d.nelt == 2)
44675 return true;
44678 /* Extract the values from the vector CST into the permutation
44679 array in D. */
44680 memcpy (d.perm, sel, nelt);
44681 for (i = which = 0; i < nelt; ++i)
44683 unsigned char e = d.perm[i];
44684 gcc_assert (e < 2 * nelt);
44685 which |= (e < nelt ? 1 : 2);
44688 /* For all elements from second vector, fold the elements to first. */
44689 if (which == 2)
44690 for (i = 0; i < nelt; ++i)
44691 d.perm[i] -= nelt;
44693 /* Check whether the mask can be applied to the vector type. */
44694 d.one_operand_p = (which != 3);
44696 /* Implementable with shufps or pshufd. */
44697 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44698 return true;
44700 /* Otherwise we have to go through the motions and see if we can
44701 figure out how to generate the requested permutation. */
44702 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44703 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44704 if (!d.one_operand_p)
44705 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44707 start_sequence ();
44708 ret = ix86_expand_vec_perm_const_1 (&d);
44709 end_sequence ();
44711 return ret;
44714 void
44715 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44717 struct expand_vec_perm_d d;
44718 unsigned i, nelt;
44720 d.target = targ;
44721 d.op0 = op0;
44722 d.op1 = op1;
44723 d.vmode = GET_MODE (targ);
44724 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44725 d.one_operand_p = false;
44726 d.testing_p = false;
44728 for (i = 0; i < nelt; ++i)
44729 d.perm[i] = i * 2 + odd;
44731 /* We'll either be able to implement the permutation directly... */
44732 if (expand_vec_perm_1 (&d))
44733 return;
44735 /* ... or we use the special-case patterns. */
44736 expand_vec_perm_even_odd_1 (&d, odd);
44739 static void
44740 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44742 struct expand_vec_perm_d d;
44743 unsigned i, nelt, base;
44744 bool ok;
44746 d.target = targ;
44747 d.op0 = op0;
44748 d.op1 = op1;
44749 d.vmode = GET_MODE (targ);
44750 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44751 d.one_operand_p = false;
44752 d.testing_p = false;
44754 base = high_p ? nelt / 2 : 0;
44755 for (i = 0; i < nelt / 2; ++i)
44757 d.perm[i * 2] = i + base;
44758 d.perm[i * 2 + 1] = i + base + nelt;
44761 /* Note that for AVX this isn't one instruction. */
44762 ok = ix86_expand_vec_perm_const_1 (&d);
44763 gcc_assert (ok);
44767 /* Expand a vector operation CODE for a V*QImode in terms of the
44768 same operation on V*HImode. */
44770 void
44771 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44773 enum machine_mode qimode = GET_MODE (dest);
44774 enum machine_mode himode;
44775 rtx (*gen_il) (rtx, rtx, rtx);
44776 rtx (*gen_ih) (rtx, rtx, rtx);
44777 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44778 struct expand_vec_perm_d d;
44779 bool ok, full_interleave;
44780 bool uns_p = false;
44781 int i;
44783 switch (qimode)
44785 case V16QImode:
44786 himode = V8HImode;
44787 gen_il = gen_vec_interleave_lowv16qi;
44788 gen_ih = gen_vec_interleave_highv16qi;
44789 break;
44790 case V32QImode:
44791 himode = V16HImode;
44792 gen_il = gen_avx2_interleave_lowv32qi;
44793 gen_ih = gen_avx2_interleave_highv32qi;
44794 break;
44795 default:
44796 gcc_unreachable ();
44799 op2_l = op2_h = op2;
44800 switch (code)
44802 case MULT:
44803 /* Unpack data such that we've got a source byte in each low byte of
44804 each word. We don't care what goes into the high byte of each word.
44805 Rather than trying to get zero in there, most convenient is to let
44806 it be a copy of the low byte. */
44807 op2_l = gen_reg_rtx (qimode);
44808 op2_h = gen_reg_rtx (qimode);
44809 emit_insn (gen_il (op2_l, op2, op2));
44810 emit_insn (gen_ih (op2_h, op2, op2));
44811 /* FALLTHRU */
44813 op1_l = gen_reg_rtx (qimode);
44814 op1_h = gen_reg_rtx (qimode);
44815 emit_insn (gen_il (op1_l, op1, op1));
44816 emit_insn (gen_ih (op1_h, op1, op1));
44817 full_interleave = qimode == V16QImode;
44818 break;
44820 case ASHIFT:
44821 case LSHIFTRT:
44822 uns_p = true;
44823 /* FALLTHRU */
44824 case ASHIFTRT:
44825 op1_l = gen_reg_rtx (himode);
44826 op1_h = gen_reg_rtx (himode);
44827 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44828 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44829 full_interleave = true;
44830 break;
44831 default:
44832 gcc_unreachable ();
44835 /* Perform the operation. */
44836 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44837 1, OPTAB_DIRECT);
44838 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44839 1, OPTAB_DIRECT);
44840 gcc_assert (res_l && res_h);
44842 /* Merge the data back into the right place. */
44843 d.target = dest;
44844 d.op0 = gen_lowpart (qimode, res_l);
44845 d.op1 = gen_lowpart (qimode, res_h);
44846 d.vmode = qimode;
44847 d.nelt = GET_MODE_NUNITS (qimode);
44848 d.one_operand_p = false;
44849 d.testing_p = false;
44851 if (full_interleave)
44853 /* For SSE2, we used an full interleave, so the desired
44854 results are in the even elements. */
44855 for (i = 0; i < 32; ++i)
44856 d.perm[i] = i * 2;
44858 else
44860 /* For AVX, the interleave used above was not cross-lane. So the
44861 extraction is evens but with the second and third quarter swapped.
44862 Happily, that is even one insn shorter than even extraction. */
44863 for (i = 0; i < 32; ++i)
44864 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44867 ok = ix86_expand_vec_perm_const_1 (&d);
44868 gcc_assert (ok);
44870 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44871 gen_rtx_fmt_ee (code, qimode, op1, op2));
44874 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44875 if op is CONST_VECTOR with all odd elements equal to their
44876 preceding element. */
44878 static bool
44879 const_vector_equal_evenodd_p (rtx op)
44881 enum machine_mode mode = GET_MODE (op);
44882 int i, nunits = GET_MODE_NUNITS (mode);
44883 if (GET_CODE (op) != CONST_VECTOR
44884 || nunits != CONST_VECTOR_NUNITS (op))
44885 return false;
44886 for (i = 0; i < nunits; i += 2)
44887 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44888 return false;
44889 return true;
44892 void
44893 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44894 bool uns_p, bool odd_p)
44896 enum machine_mode mode = GET_MODE (op1);
44897 enum machine_mode wmode = GET_MODE (dest);
44898 rtx x;
44899 rtx orig_op1 = op1, orig_op2 = op2;
44901 if (!nonimmediate_operand (op1, mode))
44902 op1 = force_reg (mode, op1);
44903 if (!nonimmediate_operand (op2, mode))
44904 op2 = force_reg (mode, op2);
44906 /* We only play even/odd games with vectors of SImode. */
44907 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44909 /* If we're looking for the odd results, shift those members down to
44910 the even slots. For some cpus this is faster than a PSHUFD. */
44911 if (odd_p)
44913 /* For XOP use vpmacsdqh, but only for smult, as it is only
44914 signed. */
44915 if (TARGET_XOP && mode == V4SImode && !uns_p)
44917 x = force_reg (wmode, CONST0_RTX (wmode));
44918 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44919 return;
44922 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44923 if (!const_vector_equal_evenodd_p (orig_op1))
44924 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44925 x, NULL, 1, OPTAB_DIRECT);
44926 if (!const_vector_equal_evenodd_p (orig_op2))
44927 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44928 x, NULL, 1, OPTAB_DIRECT);
44929 op1 = gen_lowpart (mode, op1);
44930 op2 = gen_lowpart (mode, op2);
44933 if (mode == V16SImode)
44935 if (uns_p)
44936 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44937 else
44938 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44940 else if (mode == V8SImode)
44942 if (uns_p)
44943 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44944 else
44945 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44947 else if (uns_p)
44948 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44949 else if (TARGET_SSE4_1)
44950 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
44951 else
44953 rtx s1, s2, t0, t1, t2;
44955 /* The easiest way to implement this without PMULDQ is to go through
44956 the motions as if we are performing a full 64-bit multiply. With
44957 the exception that we need to do less shuffling of the elements. */
44959 /* Compute the sign-extension, aka highparts, of the two operands. */
44960 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44961 op1, pc_rtx, pc_rtx);
44962 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44963 op2, pc_rtx, pc_rtx);
44965 /* Multiply LO(A) * HI(B), and vice-versa. */
44966 t1 = gen_reg_rtx (wmode);
44967 t2 = gen_reg_rtx (wmode);
44968 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
44969 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
44971 /* Multiply LO(A) * LO(B). */
44972 t0 = gen_reg_rtx (wmode);
44973 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
44975 /* Combine and shift the highparts into place. */
44976 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
44977 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
44978 1, OPTAB_DIRECT);
44980 /* Combine high and low parts. */
44981 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
44982 return;
44984 emit_insn (x);
44987 void
44988 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
44989 bool uns_p, bool high_p)
44991 enum machine_mode wmode = GET_MODE (dest);
44992 enum machine_mode mode = GET_MODE (op1);
44993 rtx t1, t2, t3, t4, mask;
44995 switch (mode)
44997 case V4SImode:
44998 t1 = gen_reg_rtx (mode);
44999 t2 = gen_reg_rtx (mode);
45000 if (TARGET_XOP && !uns_p)
45002 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45003 shuffle the elements once so that all elements are in the right
45004 place for immediate use: { A C B D }. */
45005 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45006 const1_rtx, GEN_INT (3)));
45007 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45008 const1_rtx, GEN_INT (3)));
45010 else
45012 /* Put the elements into place for the multiply. */
45013 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45014 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45015 high_p = false;
45017 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45018 break;
45020 case V8SImode:
45021 /* Shuffle the elements between the lanes. After this we
45022 have { A B E F | C D G H } for each operand. */
45023 t1 = gen_reg_rtx (V4DImode);
45024 t2 = gen_reg_rtx (V4DImode);
45025 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45026 const0_rtx, const2_rtx,
45027 const1_rtx, GEN_INT (3)));
45028 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45029 const0_rtx, const2_rtx,
45030 const1_rtx, GEN_INT (3)));
45032 /* Shuffle the elements within the lanes. After this we
45033 have { A A B B | C C D D } or { E E F F | G G H H }. */
45034 t3 = gen_reg_rtx (V8SImode);
45035 t4 = gen_reg_rtx (V8SImode);
45036 mask = GEN_INT (high_p
45037 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45038 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45039 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45040 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45042 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45043 break;
45045 case V8HImode:
45046 case V16HImode:
45047 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45048 uns_p, OPTAB_DIRECT);
45049 t2 = expand_binop (mode,
45050 uns_p ? umul_highpart_optab : smul_highpart_optab,
45051 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45052 gcc_assert (t1 && t2);
45054 t3 = gen_reg_rtx (mode);
45055 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45056 emit_move_insn (dest, gen_lowpart (wmode, t3));
45057 break;
45059 case V16QImode:
45060 case V32QImode:
45061 t1 = gen_reg_rtx (wmode);
45062 t2 = gen_reg_rtx (wmode);
45063 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45064 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45066 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45067 break;
45069 default:
45070 gcc_unreachable ();
45074 void
45075 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45077 rtx res_1, res_2, res_3, res_4;
45079 res_1 = gen_reg_rtx (V4SImode);
45080 res_2 = gen_reg_rtx (V4SImode);
45081 res_3 = gen_reg_rtx (V2DImode);
45082 res_4 = gen_reg_rtx (V2DImode);
45083 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45084 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45086 /* Move the results in element 2 down to element 1; we don't care
45087 what goes in elements 2 and 3. Then we can merge the parts
45088 back together with an interleave.
45090 Note that two other sequences were tried:
45091 (1) Use interleaves at the start instead of psrldq, which allows
45092 us to use a single shufps to merge things back at the end.
45093 (2) Use shufps here to combine the two vectors, then pshufd to
45094 put the elements in the correct order.
45095 In both cases the cost of the reformatting stall was too high
45096 and the overall sequence slower. */
45098 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45099 const0_rtx, const2_rtx,
45100 const0_rtx, const0_rtx));
45101 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45102 const0_rtx, const2_rtx,
45103 const0_rtx, const0_rtx));
45104 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45106 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45109 void
45110 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45112 enum machine_mode mode = GET_MODE (op0);
45113 rtx t1, t2, t3, t4, t5, t6;
45115 if (TARGET_XOP && mode == V2DImode)
45117 /* op1: A,B,C,D, op2: E,F,G,H */
45118 op1 = gen_lowpart (V4SImode, op1);
45119 op2 = gen_lowpart (V4SImode, op2);
45121 t1 = gen_reg_rtx (V4SImode);
45122 t2 = gen_reg_rtx (V4SImode);
45123 t3 = gen_reg_rtx (V2DImode);
45124 t4 = gen_reg_rtx (V2DImode);
45126 /* t1: B,A,D,C */
45127 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45128 GEN_INT (1),
45129 GEN_INT (0),
45130 GEN_INT (3),
45131 GEN_INT (2)));
45133 /* t2: (B*E),(A*F),(D*G),(C*H) */
45134 emit_insn (gen_mulv4si3 (t2, t1, op2));
45136 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45137 emit_insn (gen_xop_phadddq (t3, t2));
45139 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45140 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45142 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45143 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45145 else
45147 enum machine_mode nmode;
45148 rtx (*umul) (rtx, rtx, rtx);
45150 if (mode == V2DImode)
45152 umul = gen_vec_widen_umult_even_v4si;
45153 nmode = V4SImode;
45155 else if (mode == V4DImode)
45157 umul = gen_vec_widen_umult_even_v8si;
45158 nmode = V8SImode;
45160 else if (mode == V8DImode)
45162 umul = gen_vec_widen_umult_even_v16si;
45163 nmode = V16SImode;
45165 else
45166 gcc_unreachable ();
45169 /* Multiply low parts. */
45170 t1 = gen_reg_rtx (mode);
45171 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45173 /* Shift input vectors right 32 bits so we can multiply high parts. */
45174 t6 = GEN_INT (32);
45175 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45176 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45178 /* Multiply high parts by low parts. */
45179 t4 = gen_reg_rtx (mode);
45180 t5 = gen_reg_rtx (mode);
45181 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45182 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45184 /* Combine and shift the highparts back. */
45185 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45186 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45188 /* Combine high and low parts. */
45189 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45192 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45193 gen_rtx_MULT (mode, op1, op2));
45196 /* Calculate integer abs() using only SSE2 instructions. */
45198 void
45199 ix86_expand_sse2_abs (rtx target, rtx input)
45201 enum machine_mode mode = GET_MODE (target);
45202 rtx tmp0, tmp1, x;
45204 switch (mode)
45206 /* For 32-bit signed integer X, the best way to calculate the absolute
45207 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45208 case V4SImode:
45209 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45210 GEN_INT (GET_MODE_BITSIZE
45211 (GET_MODE_INNER (mode)) - 1),
45212 NULL, 0, OPTAB_DIRECT);
45213 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45214 NULL, 0, OPTAB_DIRECT);
45215 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45216 target, 0, OPTAB_DIRECT);
45217 break;
45219 /* For 16-bit signed integer X, the best way to calculate the absolute
45220 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45221 case V8HImode:
45222 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45224 x = expand_simple_binop (mode, SMAX, tmp0, input,
45225 target, 0, OPTAB_DIRECT);
45226 break;
45228 /* For 8-bit signed integer X, the best way to calculate the absolute
45229 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45230 as SSE2 provides the PMINUB insn. */
45231 case V16QImode:
45232 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45234 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45235 target, 0, OPTAB_DIRECT);
45236 break;
45238 default:
45239 gcc_unreachable ();
45242 if (x != target)
45243 emit_move_insn (target, x);
45246 /* Expand an insert into a vector register through pinsr insn.
45247 Return true if successful. */
45249 bool
45250 ix86_expand_pinsr (rtx *operands)
45252 rtx dst = operands[0];
45253 rtx src = operands[3];
45255 unsigned int size = INTVAL (operands[1]);
45256 unsigned int pos = INTVAL (operands[2]);
45258 if (GET_CODE (dst) == SUBREG)
45260 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45261 dst = SUBREG_REG (dst);
45264 if (GET_CODE (src) == SUBREG)
45265 src = SUBREG_REG (src);
45267 switch (GET_MODE (dst))
45269 case V16QImode:
45270 case V8HImode:
45271 case V4SImode:
45272 case V2DImode:
45274 enum machine_mode srcmode, dstmode;
45275 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45277 srcmode = mode_for_size (size, MODE_INT, 0);
45279 switch (srcmode)
45281 case QImode:
45282 if (!TARGET_SSE4_1)
45283 return false;
45284 dstmode = V16QImode;
45285 pinsr = gen_sse4_1_pinsrb;
45286 break;
45288 case HImode:
45289 if (!TARGET_SSE2)
45290 return false;
45291 dstmode = V8HImode;
45292 pinsr = gen_sse2_pinsrw;
45293 break;
45295 case SImode:
45296 if (!TARGET_SSE4_1)
45297 return false;
45298 dstmode = V4SImode;
45299 pinsr = gen_sse4_1_pinsrd;
45300 break;
45302 case DImode:
45303 gcc_assert (TARGET_64BIT);
45304 if (!TARGET_SSE4_1)
45305 return false;
45306 dstmode = V2DImode;
45307 pinsr = gen_sse4_1_pinsrq;
45308 break;
45310 default:
45311 return false;
45314 rtx d = dst;
45315 if (GET_MODE (dst) != dstmode)
45316 d = gen_reg_rtx (dstmode);
45317 src = gen_lowpart (srcmode, src);
45319 pos /= size;
45321 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45322 GEN_INT (1 << pos)));
45323 if (d != dst)
45324 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45325 return true;
45328 default:
45329 return false;
45333 /* This function returns the calling abi specific va_list type node.
45334 It returns the FNDECL specific va_list type. */
45336 static tree
45337 ix86_fn_abi_va_list (tree fndecl)
45339 if (!TARGET_64BIT)
45340 return va_list_type_node;
45341 gcc_assert (fndecl != NULL_TREE);
45343 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45344 return ms_va_list_type_node;
45345 else
45346 return sysv_va_list_type_node;
45349 /* Returns the canonical va_list type specified by TYPE. If there
45350 is no valid TYPE provided, it return NULL_TREE. */
45352 static tree
45353 ix86_canonical_va_list_type (tree type)
45355 tree wtype, htype;
45357 /* Resolve references and pointers to va_list type. */
45358 if (TREE_CODE (type) == MEM_REF)
45359 type = TREE_TYPE (type);
45360 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45361 type = TREE_TYPE (type);
45362 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45363 type = TREE_TYPE (type);
45365 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45367 wtype = va_list_type_node;
45368 gcc_assert (wtype != NULL_TREE);
45369 htype = type;
45370 if (TREE_CODE (wtype) == ARRAY_TYPE)
45372 /* If va_list is an array type, the argument may have decayed
45373 to a pointer type, e.g. by being passed to another function.
45374 In that case, unwrap both types so that we can compare the
45375 underlying records. */
45376 if (TREE_CODE (htype) == ARRAY_TYPE
45377 || POINTER_TYPE_P (htype))
45379 wtype = TREE_TYPE (wtype);
45380 htype = TREE_TYPE (htype);
45383 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45384 return va_list_type_node;
45385 wtype = sysv_va_list_type_node;
45386 gcc_assert (wtype != NULL_TREE);
45387 htype = type;
45388 if (TREE_CODE (wtype) == ARRAY_TYPE)
45390 /* If va_list is an array type, the argument may have decayed
45391 to a pointer type, e.g. by being passed to another function.
45392 In that case, unwrap both types so that we can compare the
45393 underlying records. */
45394 if (TREE_CODE (htype) == ARRAY_TYPE
45395 || POINTER_TYPE_P (htype))
45397 wtype = TREE_TYPE (wtype);
45398 htype = TREE_TYPE (htype);
45401 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45402 return sysv_va_list_type_node;
45403 wtype = ms_va_list_type_node;
45404 gcc_assert (wtype != NULL_TREE);
45405 htype = type;
45406 if (TREE_CODE (wtype) == ARRAY_TYPE)
45408 /* If va_list is an array type, the argument may have decayed
45409 to a pointer type, e.g. by being passed to another function.
45410 In that case, unwrap both types so that we can compare the
45411 underlying records. */
45412 if (TREE_CODE (htype) == ARRAY_TYPE
45413 || POINTER_TYPE_P (htype))
45415 wtype = TREE_TYPE (wtype);
45416 htype = TREE_TYPE (htype);
45419 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45420 return ms_va_list_type_node;
45421 return NULL_TREE;
45423 return std_canonical_va_list_type (type);
45426 /* Iterate through the target-specific builtin types for va_list.
45427 IDX denotes the iterator, *PTREE is set to the result type of
45428 the va_list builtin, and *PNAME to its internal type.
45429 Returns zero if there is no element for this index, otherwise
45430 IDX should be increased upon the next call.
45431 Note, do not iterate a base builtin's name like __builtin_va_list.
45432 Used from c_common_nodes_and_builtins. */
45434 static int
45435 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45437 if (TARGET_64BIT)
45439 switch (idx)
45441 default:
45442 break;
45444 case 0:
45445 *ptree = ms_va_list_type_node;
45446 *pname = "__builtin_ms_va_list";
45447 return 1;
45449 case 1:
45450 *ptree = sysv_va_list_type_node;
45451 *pname = "__builtin_sysv_va_list";
45452 return 1;
45456 return 0;
45459 #undef TARGET_SCHED_DISPATCH
45460 #define TARGET_SCHED_DISPATCH has_dispatch
45461 #undef TARGET_SCHED_DISPATCH_DO
45462 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45463 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45464 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45465 #undef TARGET_SCHED_REORDER
45466 #define TARGET_SCHED_REORDER ix86_sched_reorder
45467 #undef TARGET_SCHED_ADJUST_PRIORITY
45468 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45469 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45470 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45471 ix86_dependencies_evaluation_hook
45473 /* The size of the dispatch window is the total number of bytes of
45474 object code allowed in a window. */
45475 #define DISPATCH_WINDOW_SIZE 16
45477 /* Number of dispatch windows considered for scheduling. */
45478 #define MAX_DISPATCH_WINDOWS 3
45480 /* Maximum number of instructions in a window. */
45481 #define MAX_INSN 4
45483 /* Maximum number of immediate operands in a window. */
45484 #define MAX_IMM 4
45486 /* Maximum number of immediate bits allowed in a window. */
45487 #define MAX_IMM_SIZE 128
45489 /* Maximum number of 32 bit immediates allowed in a window. */
45490 #define MAX_IMM_32 4
45492 /* Maximum number of 64 bit immediates allowed in a window. */
45493 #define MAX_IMM_64 2
45495 /* Maximum total of loads or prefetches allowed in a window. */
45496 #define MAX_LOAD 2
45498 /* Maximum total of stores allowed in a window. */
45499 #define MAX_STORE 1
45501 #undef BIG
45502 #define BIG 100
45505 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45506 enum dispatch_group {
45507 disp_no_group = 0,
45508 disp_load,
45509 disp_store,
45510 disp_load_store,
45511 disp_prefetch,
45512 disp_imm,
45513 disp_imm_32,
45514 disp_imm_64,
45515 disp_branch,
45516 disp_cmp,
45517 disp_jcc,
45518 disp_last
45521 /* Number of allowable groups in a dispatch window. It is an array
45522 indexed by dispatch_group enum. 100 is used as a big number,
45523 because the number of these kind of operations does not have any
45524 effect in dispatch window, but we need them for other reasons in
45525 the table. */
45526 static unsigned int num_allowable_groups[disp_last] = {
45527 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45530 char group_name[disp_last + 1][16] = {
45531 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45532 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45533 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45536 /* Instruction path. */
45537 enum insn_path {
45538 no_path = 0,
45539 path_single, /* Single micro op. */
45540 path_double, /* Double micro op. */
45541 path_multi, /* Instructions with more than 2 micro op.. */
45542 last_path
45545 /* sched_insn_info defines a window to the instructions scheduled in
45546 the basic block. It contains a pointer to the insn_info table and
45547 the instruction scheduled.
45549 Windows are allocated for each basic block and are linked
45550 together. */
45551 typedef struct sched_insn_info_s {
45552 rtx insn;
45553 enum dispatch_group group;
45554 enum insn_path path;
45555 int byte_len;
45556 int imm_bytes;
45557 } sched_insn_info;
45559 /* Linked list of dispatch windows. This is a two way list of
45560 dispatch windows of a basic block. It contains information about
45561 the number of uops in the window and the total number of
45562 instructions and of bytes in the object code for this dispatch
45563 window. */
45564 typedef struct dispatch_windows_s {
45565 int num_insn; /* Number of insn in the window. */
45566 int num_uops; /* Number of uops in the window. */
45567 int window_size; /* Number of bytes in the window. */
45568 int window_num; /* Window number between 0 or 1. */
45569 int num_imm; /* Number of immediates in an insn. */
45570 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45571 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45572 int imm_size; /* Total immediates in the window. */
45573 int num_loads; /* Total memory loads in the window. */
45574 int num_stores; /* Total memory stores in the window. */
45575 int violation; /* Violation exists in window. */
45576 sched_insn_info *window; /* Pointer to the window. */
45577 struct dispatch_windows_s *next;
45578 struct dispatch_windows_s *prev;
45579 } dispatch_windows;
45581 /* Immediate valuse used in an insn. */
45582 typedef struct imm_info_s
45584 int imm;
45585 int imm32;
45586 int imm64;
45587 } imm_info;
45589 static dispatch_windows *dispatch_window_list;
45590 static dispatch_windows *dispatch_window_list1;
45592 /* Get dispatch group of insn. */
45594 static enum dispatch_group
45595 get_mem_group (rtx insn)
45597 enum attr_memory memory;
45599 if (INSN_CODE (insn) < 0)
45600 return disp_no_group;
45601 memory = get_attr_memory (insn);
45602 if (memory == MEMORY_STORE)
45603 return disp_store;
45605 if (memory == MEMORY_LOAD)
45606 return disp_load;
45608 if (memory == MEMORY_BOTH)
45609 return disp_load_store;
45611 return disp_no_group;
45614 /* Return true if insn is a compare instruction. */
45616 static bool
45617 is_cmp (rtx insn)
45619 enum attr_type type;
45621 type = get_attr_type (insn);
45622 return (type == TYPE_TEST
45623 || type == TYPE_ICMP
45624 || type == TYPE_FCMP
45625 || GET_CODE (PATTERN (insn)) == COMPARE);
45628 /* Return true if a dispatch violation encountered. */
45630 static bool
45631 dispatch_violation (void)
45633 if (dispatch_window_list->next)
45634 return dispatch_window_list->next->violation;
45635 return dispatch_window_list->violation;
45638 /* Return true if insn is a branch instruction. */
45640 static bool
45641 is_branch (rtx insn)
45643 return (CALL_P (insn) || JUMP_P (insn));
45646 /* Return true if insn is a prefetch instruction. */
45648 static bool
45649 is_prefetch (rtx insn)
45651 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45654 /* This function initializes a dispatch window and the list container holding a
45655 pointer to the window. */
45657 static void
45658 init_window (int window_num)
45660 int i;
45661 dispatch_windows *new_list;
45663 if (window_num == 0)
45664 new_list = dispatch_window_list;
45665 else
45666 new_list = dispatch_window_list1;
45668 new_list->num_insn = 0;
45669 new_list->num_uops = 0;
45670 new_list->window_size = 0;
45671 new_list->next = NULL;
45672 new_list->prev = NULL;
45673 new_list->window_num = window_num;
45674 new_list->num_imm = 0;
45675 new_list->num_imm_32 = 0;
45676 new_list->num_imm_64 = 0;
45677 new_list->imm_size = 0;
45678 new_list->num_loads = 0;
45679 new_list->num_stores = 0;
45680 new_list->violation = false;
45682 for (i = 0; i < MAX_INSN; i++)
45684 new_list->window[i].insn = NULL;
45685 new_list->window[i].group = disp_no_group;
45686 new_list->window[i].path = no_path;
45687 new_list->window[i].byte_len = 0;
45688 new_list->window[i].imm_bytes = 0;
45690 return;
45693 /* This function allocates and initializes a dispatch window and the
45694 list container holding a pointer to the window. */
45696 static dispatch_windows *
45697 allocate_window (void)
45699 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45700 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45702 return new_list;
45705 /* This routine initializes the dispatch scheduling information. It
45706 initiates building dispatch scheduler tables and constructs the
45707 first dispatch window. */
45709 static void
45710 init_dispatch_sched (void)
45712 /* Allocate a dispatch list and a window. */
45713 dispatch_window_list = allocate_window ();
45714 dispatch_window_list1 = allocate_window ();
45715 init_window (0);
45716 init_window (1);
45719 /* This function returns true if a branch is detected. End of a basic block
45720 does not have to be a branch, but here we assume only branches end a
45721 window. */
45723 static bool
45724 is_end_basic_block (enum dispatch_group group)
45726 return group == disp_branch;
45729 /* This function is called when the end of a window processing is reached. */
45731 static void
45732 process_end_window (void)
45734 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45735 if (dispatch_window_list->next)
45737 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45738 gcc_assert (dispatch_window_list->window_size
45739 + dispatch_window_list1->window_size <= 48);
45740 init_window (1);
45742 init_window (0);
45745 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45746 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45747 for 48 bytes of instructions. Note that these windows are not dispatch
45748 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45750 static dispatch_windows *
45751 allocate_next_window (int window_num)
45753 if (window_num == 0)
45755 if (dispatch_window_list->next)
45756 init_window (1);
45757 init_window (0);
45758 return dispatch_window_list;
45761 dispatch_window_list->next = dispatch_window_list1;
45762 dispatch_window_list1->prev = dispatch_window_list;
45764 return dispatch_window_list1;
45767 /* Increment the number of immediate operands of an instruction. */
45769 static int
45770 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45772 if (*in_rtx == 0)
45773 return 0;
45775 switch ( GET_CODE (*in_rtx))
45777 case CONST:
45778 case SYMBOL_REF:
45779 case CONST_INT:
45780 (imm_values->imm)++;
45781 if (x86_64_immediate_operand (*in_rtx, SImode))
45782 (imm_values->imm32)++;
45783 else
45784 (imm_values->imm64)++;
45785 break;
45787 case CONST_DOUBLE:
45788 (imm_values->imm)++;
45789 (imm_values->imm64)++;
45790 break;
45792 case CODE_LABEL:
45793 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45795 (imm_values->imm)++;
45796 (imm_values->imm32)++;
45798 break;
45800 default:
45801 break;
45804 return 0;
45807 /* Compute number of immediate operands of an instruction. */
45809 static void
45810 find_constant (rtx in_rtx, imm_info *imm_values)
45812 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45813 (rtx_function) find_constant_1, (void *) imm_values);
45816 /* Return total size of immediate operands of an instruction along with number
45817 of corresponding immediate-operands. It initializes its parameters to zero
45818 befor calling FIND_CONSTANT.
45819 INSN is the input instruction. IMM is the total of immediates.
45820 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45821 bit immediates. */
45823 static int
45824 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45826 imm_info imm_values = {0, 0, 0};
45828 find_constant (insn, &imm_values);
45829 *imm = imm_values.imm;
45830 *imm32 = imm_values.imm32;
45831 *imm64 = imm_values.imm64;
45832 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45835 /* This function indicates if an operand of an instruction is an
45836 immediate. */
45838 static bool
45839 has_immediate (rtx insn)
45841 int num_imm_operand;
45842 int num_imm32_operand;
45843 int num_imm64_operand;
45845 if (insn)
45846 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45847 &num_imm64_operand);
45848 return false;
45851 /* Return single or double path for instructions. */
45853 static enum insn_path
45854 get_insn_path (rtx insn)
45856 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45858 if ((int)path == 0)
45859 return path_single;
45861 if ((int)path == 1)
45862 return path_double;
45864 return path_multi;
45867 /* Return insn dispatch group. */
45869 static enum dispatch_group
45870 get_insn_group (rtx insn)
45872 enum dispatch_group group = get_mem_group (insn);
45873 if (group)
45874 return group;
45876 if (is_branch (insn))
45877 return disp_branch;
45879 if (is_cmp (insn))
45880 return disp_cmp;
45882 if (has_immediate (insn))
45883 return disp_imm;
45885 if (is_prefetch (insn))
45886 return disp_prefetch;
45888 return disp_no_group;
45891 /* Count number of GROUP restricted instructions in a dispatch
45892 window WINDOW_LIST. */
45894 static int
45895 count_num_restricted (rtx insn, dispatch_windows *window_list)
45897 enum dispatch_group group = get_insn_group (insn);
45898 int imm_size;
45899 int num_imm_operand;
45900 int num_imm32_operand;
45901 int num_imm64_operand;
45903 if (group == disp_no_group)
45904 return 0;
45906 if (group == disp_imm)
45908 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45909 &num_imm64_operand);
45910 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45911 || num_imm_operand + window_list->num_imm > MAX_IMM
45912 || (num_imm32_operand > 0
45913 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45914 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45915 || (num_imm64_operand > 0
45916 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45917 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45918 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45919 && num_imm64_operand > 0
45920 && ((window_list->num_imm_64 > 0
45921 && window_list->num_insn >= 2)
45922 || window_list->num_insn >= 3)))
45923 return BIG;
45925 return 1;
45928 if ((group == disp_load_store
45929 && (window_list->num_loads >= MAX_LOAD
45930 || window_list->num_stores >= MAX_STORE))
45931 || ((group == disp_load
45932 || group == disp_prefetch)
45933 && window_list->num_loads >= MAX_LOAD)
45934 || (group == disp_store
45935 && window_list->num_stores >= MAX_STORE))
45936 return BIG;
45938 return 1;
45941 /* This function returns true if insn satisfies dispatch rules on the
45942 last window scheduled. */
45944 static bool
45945 fits_dispatch_window (rtx insn)
45947 dispatch_windows *window_list = dispatch_window_list;
45948 dispatch_windows *window_list_next = dispatch_window_list->next;
45949 unsigned int num_restrict;
45950 enum dispatch_group group = get_insn_group (insn);
45951 enum insn_path path = get_insn_path (insn);
45952 int sum;
45954 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
45955 instructions should be given the lowest priority in the
45956 scheduling process in Haifa scheduler to make sure they will be
45957 scheduled in the same dispatch window as the reference to them. */
45958 if (group == disp_jcc || group == disp_cmp)
45959 return false;
45961 /* Check nonrestricted. */
45962 if (group == disp_no_group || group == disp_branch)
45963 return true;
45965 /* Get last dispatch window. */
45966 if (window_list_next)
45967 window_list = window_list_next;
45969 if (window_list->window_num == 1)
45971 sum = window_list->prev->window_size + window_list->window_size;
45973 if (sum == 32
45974 || (min_insn_size (insn) + sum) >= 48)
45975 /* Window 1 is full. Go for next window. */
45976 return true;
45979 num_restrict = count_num_restricted (insn, window_list);
45981 if (num_restrict > num_allowable_groups[group])
45982 return false;
45984 /* See if it fits in the first window. */
45985 if (window_list->window_num == 0)
45987 /* The first widow should have only single and double path
45988 uops. */
45989 if (path == path_double
45990 && (window_list->num_uops + 2) > MAX_INSN)
45991 return false;
45992 else if (path != path_single)
45993 return false;
45995 return true;
45998 /* Add an instruction INSN with NUM_UOPS micro-operations to the
45999 dispatch window WINDOW_LIST. */
46001 static void
46002 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46004 int byte_len = min_insn_size (insn);
46005 int num_insn = window_list->num_insn;
46006 int imm_size;
46007 sched_insn_info *window = window_list->window;
46008 enum dispatch_group group = get_insn_group (insn);
46009 enum insn_path path = get_insn_path (insn);
46010 int num_imm_operand;
46011 int num_imm32_operand;
46012 int num_imm64_operand;
46014 if (!window_list->violation && group != disp_cmp
46015 && !fits_dispatch_window (insn))
46016 window_list->violation = true;
46018 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46019 &num_imm64_operand);
46021 /* Initialize window with new instruction. */
46022 window[num_insn].insn = insn;
46023 window[num_insn].byte_len = byte_len;
46024 window[num_insn].group = group;
46025 window[num_insn].path = path;
46026 window[num_insn].imm_bytes = imm_size;
46028 window_list->window_size += byte_len;
46029 window_list->num_insn = num_insn + 1;
46030 window_list->num_uops = window_list->num_uops + num_uops;
46031 window_list->imm_size += imm_size;
46032 window_list->num_imm += num_imm_operand;
46033 window_list->num_imm_32 += num_imm32_operand;
46034 window_list->num_imm_64 += num_imm64_operand;
46036 if (group == disp_store)
46037 window_list->num_stores += 1;
46038 else if (group == disp_load
46039 || group == disp_prefetch)
46040 window_list->num_loads += 1;
46041 else if (group == disp_load_store)
46043 window_list->num_stores += 1;
46044 window_list->num_loads += 1;
46048 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46049 If the total bytes of instructions or the number of instructions in
46050 the window exceed allowable, it allocates a new window. */
46052 static void
46053 add_to_dispatch_window (rtx insn)
46055 int byte_len;
46056 dispatch_windows *window_list;
46057 dispatch_windows *next_list;
46058 dispatch_windows *window0_list;
46059 enum insn_path path;
46060 enum dispatch_group insn_group;
46061 bool insn_fits;
46062 int num_insn;
46063 int num_uops;
46064 int window_num;
46065 int insn_num_uops;
46066 int sum;
46068 if (INSN_CODE (insn) < 0)
46069 return;
46071 byte_len = min_insn_size (insn);
46072 window_list = dispatch_window_list;
46073 next_list = window_list->next;
46074 path = get_insn_path (insn);
46075 insn_group = get_insn_group (insn);
46077 /* Get the last dispatch window. */
46078 if (next_list)
46079 window_list = dispatch_window_list->next;
46081 if (path == path_single)
46082 insn_num_uops = 1;
46083 else if (path == path_double)
46084 insn_num_uops = 2;
46085 else
46086 insn_num_uops = (int) path;
46088 /* If current window is full, get a new window.
46089 Window number zero is full, if MAX_INSN uops are scheduled in it.
46090 Window number one is full, if window zero's bytes plus window
46091 one's bytes is 32, or if the bytes of the new instruction added
46092 to the total makes it greater than 48, or it has already MAX_INSN
46093 instructions in it. */
46094 num_insn = window_list->num_insn;
46095 num_uops = window_list->num_uops;
46096 window_num = window_list->window_num;
46097 insn_fits = fits_dispatch_window (insn);
46099 if (num_insn >= MAX_INSN
46100 || num_uops + insn_num_uops > MAX_INSN
46101 || !(insn_fits))
46103 window_num = ~window_num & 1;
46104 window_list = allocate_next_window (window_num);
46107 if (window_num == 0)
46109 add_insn_window (insn, window_list, insn_num_uops);
46110 if (window_list->num_insn >= MAX_INSN
46111 && insn_group == disp_branch)
46113 process_end_window ();
46114 return;
46117 else if (window_num == 1)
46119 window0_list = window_list->prev;
46120 sum = window0_list->window_size + window_list->window_size;
46121 if (sum == 32
46122 || (byte_len + sum) >= 48)
46124 process_end_window ();
46125 window_list = dispatch_window_list;
46128 add_insn_window (insn, window_list, insn_num_uops);
46130 else
46131 gcc_unreachable ();
46133 if (is_end_basic_block (insn_group))
46135 /* End of basic block is reached do end-basic-block process. */
46136 process_end_window ();
46137 return;
46141 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46143 DEBUG_FUNCTION static void
46144 debug_dispatch_window_file (FILE *file, int window_num)
46146 dispatch_windows *list;
46147 int i;
46149 if (window_num == 0)
46150 list = dispatch_window_list;
46151 else
46152 list = dispatch_window_list1;
46154 fprintf (file, "Window #%d:\n", list->window_num);
46155 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46156 list->num_insn, list->num_uops, list->window_size);
46157 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46158 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46160 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46161 list->num_stores);
46162 fprintf (file, " insn info:\n");
46164 for (i = 0; i < MAX_INSN; i++)
46166 if (!list->window[i].insn)
46167 break;
46168 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46169 i, group_name[list->window[i].group],
46170 i, (void *)list->window[i].insn,
46171 i, list->window[i].path,
46172 i, list->window[i].byte_len,
46173 i, list->window[i].imm_bytes);
46177 /* Print to stdout a dispatch window. */
46179 DEBUG_FUNCTION void
46180 debug_dispatch_window (int window_num)
46182 debug_dispatch_window_file (stdout, window_num);
46185 /* Print INSN dispatch information to FILE. */
46187 DEBUG_FUNCTION static void
46188 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46190 int byte_len;
46191 enum insn_path path;
46192 enum dispatch_group group;
46193 int imm_size;
46194 int num_imm_operand;
46195 int num_imm32_operand;
46196 int num_imm64_operand;
46198 if (INSN_CODE (insn) < 0)
46199 return;
46201 byte_len = min_insn_size (insn);
46202 path = get_insn_path (insn);
46203 group = get_insn_group (insn);
46204 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46205 &num_imm64_operand);
46207 fprintf (file, " insn info:\n");
46208 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46209 group_name[group], path, byte_len);
46210 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46211 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46214 /* Print to STDERR the status of the ready list with respect to
46215 dispatch windows. */
46217 DEBUG_FUNCTION void
46218 debug_ready_dispatch (void)
46220 int i;
46221 int no_ready = number_in_ready ();
46223 fprintf (stdout, "Number of ready: %d\n", no_ready);
46225 for (i = 0; i < no_ready; i++)
46226 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46229 /* This routine is the driver of the dispatch scheduler. */
46231 static void
46232 do_dispatch (rtx insn, int mode)
46234 if (mode == DISPATCH_INIT)
46235 init_dispatch_sched ();
46236 else if (mode == ADD_TO_DISPATCH_WINDOW)
46237 add_to_dispatch_window (insn);
46240 /* Return TRUE if Dispatch Scheduling is supported. */
46242 static bool
46243 has_dispatch (rtx insn, int action)
46245 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46246 && flag_dispatch_scheduler)
46247 switch (action)
46249 default:
46250 return false;
46252 case IS_DISPATCH_ON:
46253 return true;
46254 break;
46256 case IS_CMP:
46257 return is_cmp (insn);
46259 case DISPATCH_VIOLATION:
46260 return dispatch_violation ();
46262 case FITS_DISPATCH_WINDOW:
46263 return fits_dispatch_window (insn);
46266 return false;
46269 /* Implementation of reassociation_width target hook used by
46270 reassoc phase to identify parallelism level in reassociated
46271 tree. Statements tree_code is passed in OPC. Arguments type
46272 is passed in MODE.
46274 Currently parallel reassociation is enabled for Atom
46275 processors only and we set reassociation width to be 2
46276 because Atom may issue up to 2 instructions per cycle.
46278 Return value should be fixed if parallel reassociation is
46279 enabled for other processors. */
46281 static int
46282 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46283 enum machine_mode mode)
46285 int res = 1;
46287 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46288 res = 2;
46289 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46290 res = 2;
46292 return res;
46295 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46296 place emms and femms instructions. */
46298 static enum machine_mode
46299 ix86_preferred_simd_mode (enum machine_mode mode)
46301 if (!TARGET_SSE)
46302 return word_mode;
46304 switch (mode)
46306 case QImode:
46307 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46308 case HImode:
46309 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46310 case SImode:
46311 return TARGET_AVX512F ? V16SImode :
46312 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46313 case DImode:
46314 return TARGET_AVX512F ? V8DImode :
46315 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46317 case SFmode:
46318 if (TARGET_AVX512F)
46319 return V16SFmode;
46320 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46321 return V8SFmode;
46322 else
46323 return V4SFmode;
46325 case DFmode:
46326 if (!TARGET_VECTORIZE_DOUBLE)
46327 return word_mode;
46328 else if (TARGET_AVX512F)
46329 return V8DFmode;
46330 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46331 return V4DFmode;
46332 else if (TARGET_SSE2)
46333 return V2DFmode;
46334 /* FALLTHRU */
46336 default:
46337 return word_mode;
46341 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46342 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46343 256bit and 128bit vectors. */
46345 static unsigned int
46346 ix86_autovectorize_vector_sizes (void)
46348 return TARGET_AVX512F ? 64 | 32 | 16 :
46349 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46354 /* Return class of registers which could be used for pseudo of MODE
46355 and of class RCLASS for spilling instead of memory. Return NO_REGS
46356 if it is not possible or non-profitable. */
46357 static reg_class_t
46358 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46360 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46361 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46362 && INTEGER_CLASS_P (rclass))
46363 return ALL_SSE_REGS;
46364 return NO_REGS;
46367 /* Implement targetm.vectorize.init_cost. */
46369 static void *
46370 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46372 unsigned *cost = XNEWVEC (unsigned, 3);
46373 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46374 return cost;
46377 /* Implement targetm.vectorize.add_stmt_cost. */
46379 static unsigned
46380 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46381 struct _stmt_vec_info *stmt_info, int misalign,
46382 enum vect_cost_model_location where)
46384 unsigned *cost = (unsigned *) data;
46385 unsigned retval = 0;
46387 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46388 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46390 /* Statements in an inner loop relative to the loop being
46391 vectorized are weighted more heavily. The value here is
46392 arbitrary and could potentially be improved with analysis. */
46393 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46394 count *= 50; /* FIXME. */
46396 retval = (unsigned) (count * stmt_cost);
46397 cost[where] += retval;
46399 return retval;
46402 /* Implement targetm.vectorize.finish_cost. */
46404 static void
46405 ix86_finish_cost (void *data, unsigned *prologue_cost,
46406 unsigned *body_cost, unsigned *epilogue_cost)
46408 unsigned *cost = (unsigned *) data;
46409 *prologue_cost = cost[vect_prologue];
46410 *body_cost = cost[vect_body];
46411 *epilogue_cost = cost[vect_epilogue];
46414 /* Implement targetm.vectorize.destroy_cost_data. */
46416 static void
46417 ix86_destroy_cost_data (void *data)
46419 free (data);
46422 /* Validate target specific memory model bits in VAL. */
46424 static unsigned HOST_WIDE_INT
46425 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46427 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46428 bool strong;
46430 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46431 |MEMMODEL_MASK)
46432 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46434 warning (OPT_Winvalid_memory_model,
46435 "Unknown architecture specific memory model");
46436 return MEMMODEL_SEQ_CST;
46438 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46439 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46441 warning (OPT_Winvalid_memory_model,
46442 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46443 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46445 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46447 warning (OPT_Winvalid_memory_model,
46448 "HLE_RELEASE not used with RELEASE or stronger memory model");
46449 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46451 return val;
46454 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46455 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46456 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46457 or number of vecsize_mangle variants that should be emitted. */
46459 static int
46460 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46461 struct cgraph_simd_clone *clonei,
46462 tree base_type, int num)
46464 int ret = 1;
46466 if (clonei->simdlen
46467 && (clonei->simdlen < 2
46468 || clonei->simdlen > 16
46469 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46471 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46472 "unsupported simdlen %d", clonei->simdlen);
46473 return 0;
46476 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46477 if (TREE_CODE (ret_type) != VOID_TYPE)
46478 switch (TYPE_MODE (ret_type))
46480 case QImode:
46481 case HImode:
46482 case SImode:
46483 case DImode:
46484 case SFmode:
46485 case DFmode:
46486 /* case SCmode: */
46487 /* case DCmode: */
46488 break;
46489 default:
46490 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46491 "unsupported return type %qT for simd\n", ret_type);
46492 return 0;
46495 tree t;
46496 int i;
46498 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46499 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46500 switch (TYPE_MODE (TREE_TYPE (t)))
46502 case QImode:
46503 case HImode:
46504 case SImode:
46505 case DImode:
46506 case SFmode:
46507 case DFmode:
46508 /* case SCmode: */
46509 /* case DCmode: */
46510 break;
46511 default:
46512 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46513 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46514 return 0;
46517 if (clonei->cilk_elemental)
46519 /* Parse here processor clause. If not present, default to 'b'. */
46520 clonei->vecsize_mangle = 'b';
46522 else if (!TREE_PUBLIC (node->decl))
46524 /* If the function isn't exported, we can pick up just one ISA
46525 for the clones. */
46526 if (TARGET_AVX2)
46527 clonei->vecsize_mangle = 'd';
46528 else if (TARGET_AVX)
46529 clonei->vecsize_mangle = 'c';
46530 else
46531 clonei->vecsize_mangle = 'b';
46532 ret = 1;
46534 else
46536 clonei->vecsize_mangle = "bcd"[num];
46537 ret = 3;
46539 switch (clonei->vecsize_mangle)
46541 case 'b':
46542 clonei->vecsize_int = 128;
46543 clonei->vecsize_float = 128;
46544 break;
46545 case 'c':
46546 clonei->vecsize_int = 128;
46547 clonei->vecsize_float = 256;
46548 break;
46549 case 'd':
46550 clonei->vecsize_int = 256;
46551 clonei->vecsize_float = 256;
46552 break;
46554 if (clonei->simdlen == 0)
46556 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46557 clonei->simdlen = clonei->vecsize_int;
46558 else
46559 clonei->simdlen = clonei->vecsize_float;
46560 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46561 if (clonei->simdlen > 16)
46562 clonei->simdlen = 16;
46564 return ret;
46567 /* Add target attribute to SIMD clone NODE if needed. */
46569 static void
46570 ix86_simd_clone_adjust (struct cgraph_node *node)
46572 const char *str = NULL;
46573 gcc_assert (node->decl == cfun->decl);
46574 switch (node->simdclone->vecsize_mangle)
46576 case 'b':
46577 if (!TARGET_SSE2)
46578 str = "sse2";
46579 break;
46580 case 'c':
46581 if (!TARGET_AVX)
46582 str = "avx";
46583 break;
46584 case 'd':
46585 if (!TARGET_AVX2)
46586 str = "avx2";
46587 break;
46588 default:
46589 gcc_unreachable ();
46591 if (str == NULL)
46592 return;
46593 push_cfun (NULL);
46594 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46595 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46596 gcc_assert (ok);
46597 pop_cfun ();
46598 ix86_previous_fndecl = NULL_TREE;
46599 ix86_set_current_function (node->decl);
46602 /* If SIMD clone NODE can't be used in a vectorized loop
46603 in current function, return -1, otherwise return a badness of using it
46604 (0 if it is most desirable from vecsize_mangle point of view, 1
46605 slightly less desirable, etc.). */
46607 static int
46608 ix86_simd_clone_usable (struct cgraph_node *node)
46610 switch (node->simdclone->vecsize_mangle)
46612 case 'b':
46613 if (!TARGET_SSE2)
46614 return -1;
46615 if (!TARGET_AVX)
46616 return 0;
46617 return TARGET_AVX2 ? 2 : 1;
46618 case 'c':
46619 if (!TARGET_AVX)
46620 return -1;
46621 return TARGET_AVX2 ? 1 : 0;
46622 break;
46623 case 'd':
46624 if (!TARGET_AVX2)
46625 return -1;
46626 return 0;
46627 default:
46628 gcc_unreachable ();
46632 /* This function gives out the number of memory references.
46633 This value determines the unrolling factor for
46634 bdver3 and bdver4 architectures. */
46636 static int
46637 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46639 if (*x != NULL_RTX && MEM_P (*x))
46641 enum machine_mode mode;
46642 unsigned int n_words;
46644 mode = GET_MODE (*x);
46645 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46647 if (n_words > 4)
46648 (*mem_count)+=2;
46649 else
46650 (*mem_count)+=1;
46652 return 0;
46655 /* This function adjusts the unroll factor based on
46656 the hardware capabilities. For ex, bdver3 has
46657 a loop buffer which makes unrolling of smaller
46658 loops less important. This function decides the
46659 unroll factor using number of memory references
46660 (value 32 is used) as a heuristic. */
46662 static unsigned
46663 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46665 basic_block *bbs;
46666 rtx insn;
46667 unsigned i;
46668 unsigned mem_count = 0;
46670 if (!TARGET_ADJUST_UNROLL)
46671 return nunroll;
46673 /* Count the number of memory references within the loop body. */
46674 bbs = get_loop_body (loop);
46675 for (i = 0; i < loop->num_nodes; i++)
46677 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46678 if (NONDEBUG_INSN_P (insn))
46679 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46681 free (bbs);
46683 if (mem_count && mem_count <=32)
46684 return 32/mem_count;
46686 return nunroll;
46690 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46692 static bool
46693 ix86_float_exceptions_rounding_supported_p (void)
46695 /* For x87 floating point with standard excess precision handling,
46696 there is no adddf3 pattern (since x87 floating point only has
46697 XFmode operations) so the default hook implementation gets this
46698 wrong. */
46699 return TARGET_80387 || TARGET_SSE_MATH;
46702 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46704 static void
46705 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46707 if (!TARGET_80387 && !TARGET_SSE_MATH)
46708 return;
46709 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46710 if (TARGET_80387)
46712 tree fenv_index_type = build_index_type (size_int (6));
46713 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46714 tree fenv_var = create_tmp_var (fenv_type, NULL);
46715 mark_addressable (fenv_var);
46716 tree fenv_ptr = build_pointer_type (fenv_type);
46717 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46718 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46719 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46720 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46721 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46722 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46723 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46724 tree hold_fnclex = build_call_expr (fnclex, 0);
46725 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46726 hold_fnclex);
46727 *clear = build_call_expr (fnclex, 0);
46728 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46729 mark_addressable (sw_var);
46730 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46731 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46732 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46733 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46734 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46735 exceptions_var, exceptions_x87);
46736 *update = build2 (COMPOUND_EXPR, integer_type_node,
46737 fnstsw_call, update_mod);
46738 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46739 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46741 if (TARGET_SSE_MATH)
46743 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46744 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46745 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46746 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46747 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46748 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46749 mxcsr_orig_var, stmxcsr_hold_call);
46750 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46751 mxcsr_orig_var,
46752 build_int_cst (unsigned_type_node, 0x1f80));
46753 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46754 build_int_cst (unsigned_type_node, 0xffffffc0));
46755 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46756 mxcsr_mod_var, hold_mod_val);
46757 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46758 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46759 hold_assign_orig, hold_assign_mod);
46760 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46761 ldmxcsr_hold_call);
46762 if (*hold)
46763 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46764 else
46765 *hold = hold_all;
46766 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46767 if (*clear)
46768 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46769 ldmxcsr_clear_call);
46770 else
46771 *clear = ldmxcsr_clear_call;
46772 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46773 tree exceptions_sse = fold_convert (integer_type_node,
46774 stxmcsr_update_call);
46775 if (*update)
46777 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46778 exceptions_var, exceptions_sse);
46779 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46780 exceptions_var, exceptions_mod);
46781 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46782 exceptions_assign);
46784 else
46785 *update = build2 (MODIFY_EXPR, integer_type_node,
46786 exceptions_var, exceptions_sse);
46787 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46788 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46789 ldmxcsr_update_call);
46791 tree atomic_feraiseexcept
46792 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46793 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46794 1, exceptions_var);
46795 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46796 atomic_feraiseexcept_call);
46799 /* Initialize the GCC target structure. */
46800 #undef TARGET_RETURN_IN_MEMORY
46801 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46803 #undef TARGET_LEGITIMIZE_ADDRESS
46804 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46806 #undef TARGET_ATTRIBUTE_TABLE
46807 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46808 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46809 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46810 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46811 # undef TARGET_MERGE_DECL_ATTRIBUTES
46812 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46813 #endif
46815 #undef TARGET_COMP_TYPE_ATTRIBUTES
46816 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46818 #undef TARGET_INIT_BUILTINS
46819 #define TARGET_INIT_BUILTINS ix86_init_builtins
46820 #undef TARGET_BUILTIN_DECL
46821 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46822 #undef TARGET_EXPAND_BUILTIN
46823 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46825 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46826 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46827 ix86_builtin_vectorized_function
46829 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46830 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46832 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46833 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46835 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46836 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46838 #undef TARGET_BUILTIN_RECIPROCAL
46839 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46841 #undef TARGET_ASM_FUNCTION_EPILOGUE
46842 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46844 #undef TARGET_ENCODE_SECTION_INFO
46845 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46846 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46847 #else
46848 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46849 #endif
46851 #undef TARGET_ASM_OPEN_PAREN
46852 #define TARGET_ASM_OPEN_PAREN ""
46853 #undef TARGET_ASM_CLOSE_PAREN
46854 #define TARGET_ASM_CLOSE_PAREN ""
46856 #undef TARGET_ASM_BYTE_OP
46857 #define TARGET_ASM_BYTE_OP ASM_BYTE
46859 #undef TARGET_ASM_ALIGNED_HI_OP
46860 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46861 #undef TARGET_ASM_ALIGNED_SI_OP
46862 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46863 #ifdef ASM_QUAD
46864 #undef TARGET_ASM_ALIGNED_DI_OP
46865 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46866 #endif
46868 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46869 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46871 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46872 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46874 #undef TARGET_ASM_UNALIGNED_HI_OP
46875 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46876 #undef TARGET_ASM_UNALIGNED_SI_OP
46877 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46878 #undef TARGET_ASM_UNALIGNED_DI_OP
46879 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46881 #undef TARGET_PRINT_OPERAND
46882 #define TARGET_PRINT_OPERAND ix86_print_operand
46883 #undef TARGET_PRINT_OPERAND_ADDRESS
46884 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46885 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46886 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46887 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46888 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46890 #undef TARGET_SCHED_INIT_GLOBAL
46891 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46892 #undef TARGET_SCHED_ADJUST_COST
46893 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46894 #undef TARGET_SCHED_ISSUE_RATE
46895 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46896 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46897 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46898 ia32_multipass_dfa_lookahead
46899 #undef TARGET_SCHED_MACRO_FUSION_P
46900 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46901 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46902 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46904 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46905 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46907 #undef TARGET_MEMMODEL_CHECK
46908 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46910 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46911 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46913 #ifdef HAVE_AS_TLS
46914 #undef TARGET_HAVE_TLS
46915 #define TARGET_HAVE_TLS true
46916 #endif
46917 #undef TARGET_CANNOT_FORCE_CONST_MEM
46918 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46919 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46920 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46922 #undef TARGET_DELEGITIMIZE_ADDRESS
46923 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46925 #undef TARGET_MS_BITFIELD_LAYOUT_P
46926 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46928 #if TARGET_MACHO
46929 #undef TARGET_BINDS_LOCAL_P
46930 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46931 #endif
46932 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46933 #undef TARGET_BINDS_LOCAL_P
46934 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46935 #endif
46937 #undef TARGET_ASM_OUTPUT_MI_THUNK
46938 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46939 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
46940 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
46942 #undef TARGET_ASM_FILE_START
46943 #define TARGET_ASM_FILE_START x86_file_start
46945 #undef TARGET_OPTION_OVERRIDE
46946 #define TARGET_OPTION_OVERRIDE ix86_option_override
46948 #undef TARGET_REGISTER_MOVE_COST
46949 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
46950 #undef TARGET_MEMORY_MOVE_COST
46951 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
46952 #undef TARGET_RTX_COSTS
46953 #define TARGET_RTX_COSTS ix86_rtx_costs
46954 #undef TARGET_ADDRESS_COST
46955 #define TARGET_ADDRESS_COST ix86_address_cost
46957 #undef TARGET_FIXED_CONDITION_CODE_REGS
46958 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
46959 #undef TARGET_CC_MODES_COMPATIBLE
46960 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
46962 #undef TARGET_MACHINE_DEPENDENT_REORG
46963 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
46965 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
46966 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
46968 #undef TARGET_BUILD_BUILTIN_VA_LIST
46969 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
46971 #undef TARGET_FOLD_BUILTIN
46972 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
46974 #undef TARGET_COMPARE_VERSION_PRIORITY
46975 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
46977 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
46978 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
46979 ix86_generate_version_dispatcher_body
46981 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
46982 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
46983 ix86_get_function_versions_dispatcher
46985 #undef TARGET_ENUM_VA_LIST_P
46986 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
46988 #undef TARGET_FN_ABI_VA_LIST
46989 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
46991 #undef TARGET_CANONICAL_VA_LIST_TYPE
46992 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
46994 #undef TARGET_EXPAND_BUILTIN_VA_START
46995 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
46997 #undef TARGET_MD_ASM_CLOBBERS
46998 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47000 #undef TARGET_PROMOTE_PROTOTYPES
47001 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47002 #undef TARGET_SETUP_INCOMING_VARARGS
47003 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47004 #undef TARGET_MUST_PASS_IN_STACK
47005 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47006 #undef TARGET_FUNCTION_ARG_ADVANCE
47007 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47008 #undef TARGET_FUNCTION_ARG
47009 #define TARGET_FUNCTION_ARG ix86_function_arg
47010 #undef TARGET_FUNCTION_ARG_BOUNDARY
47011 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47012 #undef TARGET_PASS_BY_REFERENCE
47013 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47014 #undef TARGET_INTERNAL_ARG_POINTER
47015 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47016 #undef TARGET_UPDATE_STACK_BOUNDARY
47017 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47018 #undef TARGET_GET_DRAP_RTX
47019 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47020 #undef TARGET_STRICT_ARGUMENT_NAMING
47021 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47022 #undef TARGET_STATIC_CHAIN
47023 #define TARGET_STATIC_CHAIN ix86_static_chain
47024 #undef TARGET_TRAMPOLINE_INIT
47025 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47026 #undef TARGET_RETURN_POPS_ARGS
47027 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47029 #undef TARGET_LEGITIMATE_COMBINED_INSN
47030 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47032 #undef TARGET_ASAN_SHADOW_OFFSET
47033 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47035 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47036 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47038 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47039 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47041 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47042 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47044 #undef TARGET_C_MODE_FOR_SUFFIX
47045 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47047 #ifdef HAVE_AS_TLS
47048 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47049 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47050 #endif
47052 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47053 #undef TARGET_INSERT_ATTRIBUTES
47054 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47055 #endif
47057 #undef TARGET_MANGLE_TYPE
47058 #define TARGET_MANGLE_TYPE ix86_mangle_type
47060 #if !TARGET_MACHO
47061 #undef TARGET_STACK_PROTECT_FAIL
47062 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47063 #endif
47065 #undef TARGET_FUNCTION_VALUE
47066 #define TARGET_FUNCTION_VALUE ix86_function_value
47068 #undef TARGET_FUNCTION_VALUE_REGNO_P
47069 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47071 #undef TARGET_PROMOTE_FUNCTION_MODE
47072 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47074 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47075 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47077 #undef TARGET_INSTANTIATE_DECLS
47078 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47080 #undef TARGET_SECONDARY_RELOAD
47081 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47083 #undef TARGET_CLASS_MAX_NREGS
47084 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47086 #undef TARGET_PREFERRED_RELOAD_CLASS
47087 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47088 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47089 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47090 #undef TARGET_CLASS_LIKELY_SPILLED_P
47091 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47093 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47094 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47095 ix86_builtin_vectorization_cost
47096 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47097 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47098 ix86_vectorize_vec_perm_const_ok
47099 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47100 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47101 ix86_preferred_simd_mode
47102 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47103 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47104 ix86_autovectorize_vector_sizes
47105 #undef TARGET_VECTORIZE_INIT_COST
47106 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47107 #undef TARGET_VECTORIZE_ADD_STMT_COST
47108 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47109 #undef TARGET_VECTORIZE_FINISH_COST
47110 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47111 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47112 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47114 #undef TARGET_SET_CURRENT_FUNCTION
47115 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47117 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47118 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47120 #undef TARGET_OPTION_SAVE
47121 #define TARGET_OPTION_SAVE ix86_function_specific_save
47123 #undef TARGET_OPTION_RESTORE
47124 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47126 #undef TARGET_OPTION_PRINT
47127 #define TARGET_OPTION_PRINT ix86_function_specific_print
47129 #undef TARGET_OPTION_FUNCTION_VERSIONS
47130 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47132 #undef TARGET_CAN_INLINE_P
47133 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47135 #undef TARGET_EXPAND_TO_RTL_HOOK
47136 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47138 #undef TARGET_LEGITIMATE_ADDRESS_P
47139 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47141 #undef TARGET_LRA_P
47142 #define TARGET_LRA_P hook_bool_void_true
47144 #undef TARGET_REGISTER_PRIORITY
47145 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47147 #undef TARGET_REGISTER_USAGE_LEVELING_P
47148 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47150 #undef TARGET_LEGITIMATE_CONSTANT_P
47151 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47153 #undef TARGET_FRAME_POINTER_REQUIRED
47154 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47156 #undef TARGET_CAN_ELIMINATE
47157 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47159 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47160 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47162 #undef TARGET_ASM_CODE_END
47163 #define TARGET_ASM_CODE_END ix86_code_end
47165 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47166 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47168 #if TARGET_MACHO
47169 #undef TARGET_INIT_LIBFUNCS
47170 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47171 #endif
47173 #undef TARGET_LOOP_UNROLL_ADJUST
47174 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47176 #undef TARGET_SPILL_CLASS
47177 #define TARGET_SPILL_CLASS ix86_spill_class
47179 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47180 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47181 ix86_simd_clone_compute_vecsize_and_simdlen
47183 #undef TARGET_SIMD_CLONE_ADJUST
47184 #define TARGET_SIMD_CLONE_ADJUST \
47185 ix86_simd_clone_adjust
47187 #undef TARGET_SIMD_CLONE_USABLE
47188 #define TARGET_SIMD_CLONE_USABLE \
47189 ix86_simd_clone_usable
47191 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47192 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47193 ix86_float_exceptions_rounding_supported_p
47195 struct gcc_target targetm = TARGET_INITIALIZER;
47197 #include "gt-i386.h"