Merge branches/gcc-4_9-branch rev 225109.
[official-gcc.git] / gcc-4_9-branch / gcc / config / i386 / i386.c
blob8dab1909cc470e837a0ef6007261e0cef63ede68
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
85 static rtx legitimize_dllimport_symbol (rtx, bool);
86 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
87 static rtx legitimize_pe_coff_symbol (rtx, bool);
89 #ifndef CHECK_STACK_LIMIT
90 #define CHECK_STACK_LIMIT (-1)
91 #endif
93 /* Return index of given mode in mult and division cost tables. */
94 #define MODE_INDEX(mode) \
95 ((mode) == QImode ? 0 \
96 : (mode) == HImode ? 1 \
97 : (mode) == SImode ? 2 \
98 : (mode) == DImode ? 3 \
99 : 4)
101 /* Processor costs (relative to an add) */
102 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
103 #define COSTS_N_BYTES(N) ((N) * 2)
105 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
107 static stringop_algs ix86_size_memcpy[2] = {
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
110 static stringop_algs ix86_size_memset[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 const
115 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
116 COSTS_N_BYTES (2), /* cost of an add instruction */
117 COSTS_N_BYTES (3), /* cost of a lea instruction */
118 COSTS_N_BYTES (2), /* variable shift costs */
119 COSTS_N_BYTES (3), /* constant shift costs */
120 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
121 COSTS_N_BYTES (3), /* HI */
122 COSTS_N_BYTES (3), /* SI */
123 COSTS_N_BYTES (3), /* DI */
124 COSTS_N_BYTES (5)}, /* other */
125 0, /* cost of multiply per each bit set */
126 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
127 COSTS_N_BYTES (3), /* HI */
128 COSTS_N_BYTES (3), /* SI */
129 COSTS_N_BYTES (3), /* DI */
130 COSTS_N_BYTES (5)}, /* other */
131 COSTS_N_BYTES (3), /* cost of movsx */
132 COSTS_N_BYTES (3), /* cost of movzx */
133 0, /* "large" insn */
134 2, /* MOVE_RATIO */
135 2, /* cost for loading QImode using movzbl */
136 {2, 2, 2}, /* cost of loading integer registers
137 in QImode, HImode and SImode.
138 Relative to reg-reg move (2). */
139 {2, 2, 2}, /* cost of storing integer registers */
140 2, /* cost of reg,reg fld/fst */
141 {2, 2, 2}, /* cost of loading fp registers
142 in SFmode, DFmode and XFmode */
143 {2, 2, 2}, /* cost of storing fp registers
144 in SFmode, DFmode and XFmode */
145 3, /* cost of moving MMX register */
146 {3, 3}, /* cost of loading MMX registers
147 in SImode and DImode */
148 {3, 3}, /* cost of storing MMX registers
149 in SImode and DImode */
150 3, /* cost of moving SSE register */
151 {3, 3, 3}, /* cost of loading SSE registers
152 in SImode, DImode and TImode */
153 {3, 3, 3}, /* cost of storing SSE registers
154 in SImode, DImode and TImode */
155 3, /* MMX or SSE register to integer */
156 0, /* size of l1 cache */
157 0, /* size of l2 cache */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 2, /* Branch cost */
161 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
162 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
163 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
164 COSTS_N_BYTES (2), /* cost of FABS instruction. */
165 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
166 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 ix86_size_memcpy,
168 ix86_size_memset,
169 1, /* scalar_stmt_cost. */
170 1, /* scalar load_cost. */
171 1, /* scalar_store_cost. */
172 1, /* vec_stmt_cost. */
173 1, /* vec_to_scalar_cost. */
174 1, /* scalar_to_vec_cost. */
175 1, /* vec_align_load_cost. */
176 1, /* vec_unalign_load_cost. */
177 1, /* vec_store_cost. */
178 1, /* cond_taken_branch_cost. */
179 1, /* cond_not_taken_branch_cost. */
182 /* Processor costs (relative to an add) */
183 static stringop_algs i386_memcpy[2] = {
184 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
185 DUMMY_STRINGOP_ALGS};
186 static stringop_algs i386_memset[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
190 static const
191 struct processor_costs i386_cost = { /* 386 specific costs */
192 COSTS_N_INSNS (1), /* cost of an add instruction */
193 COSTS_N_INSNS (1), /* cost of a lea instruction */
194 COSTS_N_INSNS (3), /* variable shift costs */
195 COSTS_N_INSNS (2), /* constant shift costs */
196 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
197 COSTS_N_INSNS (6), /* HI */
198 COSTS_N_INSNS (6), /* SI */
199 COSTS_N_INSNS (6), /* DI */
200 COSTS_N_INSNS (6)}, /* other */
201 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
202 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
203 COSTS_N_INSNS (23), /* HI */
204 COSTS_N_INSNS (23), /* SI */
205 COSTS_N_INSNS (23), /* DI */
206 COSTS_N_INSNS (23)}, /* other */
207 COSTS_N_INSNS (3), /* cost of movsx */
208 COSTS_N_INSNS (2), /* cost of movzx */
209 15, /* "large" insn */
210 3, /* MOVE_RATIO */
211 4, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {8, 8, 8}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {8, 8, 8}, /* cost of storing fp registers
220 in SFmode, DFmode and XFmode */
221 2, /* cost of moving MMX register */
222 {4, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {4, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of l1 cache */
233 0, /* size of l2 cache */
234 0, /* size of prefetch block */
235 0, /* number of parallel prefetches */
236 1, /* Branch cost */
237 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
238 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
239 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
240 COSTS_N_INSNS (22), /* cost of FABS instruction. */
241 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
242 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 i386_memcpy,
244 i386_memset,
245 1, /* scalar_stmt_cost. */
246 1, /* scalar load_cost. */
247 1, /* scalar_store_cost. */
248 1, /* vec_stmt_cost. */
249 1, /* vec_to_scalar_cost. */
250 1, /* scalar_to_vec_cost. */
251 1, /* vec_align_load_cost. */
252 2, /* vec_unalign_load_cost. */
253 1, /* vec_store_cost. */
254 3, /* cond_taken_branch_cost. */
255 1, /* cond_not_taken_branch_cost. */
258 static stringop_algs i486_memcpy[2] = {
259 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
260 DUMMY_STRINGOP_ALGS};
261 static stringop_algs i486_memset[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
265 static const
266 struct processor_costs i486_cost = { /* 486 specific costs */
267 COSTS_N_INSNS (1), /* cost of an add instruction */
268 COSTS_N_INSNS (1), /* cost of a lea instruction */
269 COSTS_N_INSNS (3), /* variable shift costs */
270 COSTS_N_INSNS (2), /* constant shift costs */
271 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
272 COSTS_N_INSNS (12), /* HI */
273 COSTS_N_INSNS (12), /* SI */
274 COSTS_N_INSNS (12), /* DI */
275 COSTS_N_INSNS (12)}, /* other */
276 1, /* cost of multiply per each bit set */
277 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
278 COSTS_N_INSNS (40), /* HI */
279 COSTS_N_INSNS (40), /* SI */
280 COSTS_N_INSNS (40), /* DI */
281 COSTS_N_INSNS (40)}, /* other */
282 COSTS_N_INSNS (3), /* cost of movsx */
283 COSTS_N_INSNS (2), /* cost of movzx */
284 15, /* "large" insn */
285 3, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {2, 4, 2}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 4, 2}, /* cost of storing integer registers */
291 2, /* cost of reg,reg fld/fst */
292 {8, 8, 8}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {8, 8, 8}, /* cost of storing fp registers
295 in SFmode, DFmode and XFmode */
296 2, /* cost of moving MMX register */
297 {4, 8}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {4, 8}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {4, 8, 16}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {4, 8, 16}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 3, /* MMX or SSE register to integer */
307 4, /* size of l1 cache. 486 has 8kB cache
308 shared for code and data, so 4kB is
309 not really precise. */
310 4, /* size of l2 cache */
311 0, /* size of prefetch block */
312 0, /* number of parallel prefetches */
313 1, /* Branch cost */
314 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
315 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
316 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
319 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 i486_memcpy,
321 i486_memset,
322 1, /* scalar_stmt_cost. */
323 1, /* scalar load_cost. */
324 1, /* scalar_store_cost. */
325 1, /* vec_stmt_cost. */
326 1, /* vec_to_scalar_cost. */
327 1, /* scalar_to_vec_cost. */
328 1, /* vec_align_load_cost. */
329 2, /* vec_unalign_load_cost. */
330 1, /* vec_store_cost. */
331 3, /* cond_taken_branch_cost. */
332 1, /* cond_not_taken_branch_cost. */
335 static stringop_algs pentium_memcpy[2] = {
336 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
337 DUMMY_STRINGOP_ALGS};
338 static stringop_algs pentium_memset[2] = {
339 {libcall, {{-1, rep_prefix_4_byte, false}}},
340 DUMMY_STRINGOP_ALGS};
342 static const
343 struct processor_costs pentium_cost = {
344 COSTS_N_INSNS (1), /* cost of an add instruction */
345 COSTS_N_INSNS (1), /* cost of a lea instruction */
346 COSTS_N_INSNS (4), /* variable shift costs */
347 COSTS_N_INSNS (1), /* constant shift costs */
348 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
349 COSTS_N_INSNS (11), /* HI */
350 COSTS_N_INSNS (11), /* SI */
351 COSTS_N_INSNS (11), /* DI */
352 COSTS_N_INSNS (11)}, /* other */
353 0, /* cost of multiply per each bit set */
354 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
355 COSTS_N_INSNS (25), /* HI */
356 COSTS_N_INSNS (25), /* SI */
357 COSTS_N_INSNS (25), /* DI */
358 COSTS_N_INSNS (25)}, /* other */
359 COSTS_N_INSNS (3), /* cost of movsx */
360 COSTS_N_INSNS (2), /* cost of movzx */
361 8, /* "large" insn */
362 6, /* MOVE_RATIO */
363 6, /* cost for loading QImode using movzbl */
364 {2, 4, 2}, /* cost of loading integer registers
365 in QImode, HImode and SImode.
366 Relative to reg-reg move (2). */
367 {2, 4, 2}, /* cost of storing integer registers */
368 2, /* cost of reg,reg fld/fst */
369 {2, 2, 6}, /* cost of loading fp registers
370 in SFmode, DFmode and XFmode */
371 {4, 4, 6}, /* cost of storing fp registers
372 in SFmode, DFmode and XFmode */
373 8, /* cost of moving MMX register */
374 {8, 8}, /* cost of loading MMX registers
375 in SImode and DImode */
376 {8, 8}, /* cost of storing MMX registers
377 in SImode and DImode */
378 2, /* cost of moving SSE register */
379 {4, 8, 16}, /* cost of loading SSE registers
380 in SImode, DImode and TImode */
381 {4, 8, 16}, /* cost of storing SSE registers
382 in SImode, DImode and TImode */
383 3, /* MMX or SSE register to integer */
384 8, /* size of l1 cache. */
385 8, /* size of l2 cache */
386 0, /* size of prefetch block */
387 0, /* number of parallel prefetches */
388 2, /* Branch cost */
389 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
390 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
391 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
392 COSTS_N_INSNS (1), /* cost of FABS instruction. */
393 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
394 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 pentium_memcpy,
396 pentium_memset,
397 1, /* scalar_stmt_cost. */
398 1, /* scalar load_cost. */
399 1, /* scalar_store_cost. */
400 1, /* vec_stmt_cost. */
401 1, /* vec_to_scalar_cost. */
402 1, /* scalar_to_vec_cost. */
403 1, /* vec_align_load_cost. */
404 2, /* vec_unalign_load_cost. */
405 1, /* vec_store_cost. */
406 3, /* cond_taken_branch_cost. */
407 1, /* cond_not_taken_branch_cost. */
410 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
411 (we ensure the alignment). For small blocks inline loop is still a
412 noticeable win, for bigger blocks either rep movsl or rep movsb is
413 way to go. Rep movsb has apparently more expensive startup time in CPU,
414 but after 4K the difference is down in the noise. */
415 static stringop_algs pentiumpro_memcpy[2] = {
416 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
417 {8192, rep_prefix_4_byte, false},
418 {-1, rep_prefix_1_byte, false}}},
419 DUMMY_STRINGOP_ALGS};
420 static stringop_algs pentiumpro_memset[2] = {
421 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
422 {8192, rep_prefix_4_byte, false},
423 {-1, libcall, false}}},
424 DUMMY_STRINGOP_ALGS};
425 static const
426 struct processor_costs pentiumpro_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (1), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (4), /* HI */
433 COSTS_N_INSNS (4), /* SI */
434 COSTS_N_INSNS (4), /* DI */
435 COSTS_N_INSNS (4)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (17), /* HI */
439 COSTS_N_INSNS (17), /* SI */
440 COSTS_N_INSNS (17), /* DI */
441 COSTS_N_INSNS (17)}, /* other */
442 COSTS_N_INSNS (1), /* cost of movsx */
443 COSTS_N_INSNS (1), /* cost of movzx */
444 8, /* "large" insn */
445 6, /* MOVE_RATIO */
446 2, /* cost for loading QImode using movzbl */
447 {4, 4, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 2, 2}, /* cost of storing integer registers */
451 2, /* cost of reg,reg fld/fst */
452 {2, 2, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 6}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 3, /* MMX or SSE register to integer */
467 8, /* size of l1 cache. */
468 256, /* size of l2 cache */
469 32, /* size of prefetch block */
470 6, /* number of parallel prefetches */
471 2, /* Branch cost */
472 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
473 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
474 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
475 COSTS_N_INSNS (2), /* cost of FABS instruction. */
476 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
477 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
478 pentiumpro_memcpy,
479 pentiumpro_memset,
480 1, /* scalar_stmt_cost. */
481 1, /* scalar load_cost. */
482 1, /* scalar_store_cost. */
483 1, /* vec_stmt_cost. */
484 1, /* vec_to_scalar_cost. */
485 1, /* scalar_to_vec_cost. */
486 1, /* vec_align_load_cost. */
487 2, /* vec_unalign_load_cost. */
488 1, /* vec_store_cost. */
489 3, /* cond_taken_branch_cost. */
490 1, /* cond_not_taken_branch_cost. */
493 static stringop_algs geode_memcpy[2] = {
494 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
495 DUMMY_STRINGOP_ALGS};
496 static stringop_algs geode_memset[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static const
500 struct processor_costs geode_cost = {
501 COSTS_N_INSNS (1), /* cost of an add instruction */
502 COSTS_N_INSNS (1), /* cost of a lea instruction */
503 COSTS_N_INSNS (2), /* variable shift costs */
504 COSTS_N_INSNS (1), /* constant shift costs */
505 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
506 COSTS_N_INSNS (4), /* HI */
507 COSTS_N_INSNS (7), /* SI */
508 COSTS_N_INSNS (7), /* DI */
509 COSTS_N_INSNS (7)}, /* other */
510 0, /* cost of multiply per each bit set */
511 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
512 COSTS_N_INSNS (23), /* HI */
513 COSTS_N_INSNS (39), /* SI */
514 COSTS_N_INSNS (39), /* DI */
515 COSTS_N_INSNS (39)}, /* other */
516 COSTS_N_INSNS (1), /* cost of movsx */
517 COSTS_N_INSNS (1), /* cost of movzx */
518 8, /* "large" insn */
519 4, /* MOVE_RATIO */
520 1, /* cost for loading QImode using movzbl */
521 {1, 1, 1}, /* cost of loading integer registers
522 in QImode, HImode and SImode.
523 Relative to reg-reg move (2). */
524 {1, 1, 1}, /* cost of storing integer registers */
525 1, /* cost of reg,reg fld/fst */
526 {1, 1, 1}, /* cost of loading fp registers
527 in SFmode, DFmode and XFmode */
528 {4, 6, 6}, /* cost of storing fp registers
529 in SFmode, DFmode and XFmode */
531 1, /* cost of moving MMX register */
532 {1, 1}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {1, 1}, /* cost of storing MMX registers
535 in SImode and DImode */
536 1, /* cost of moving SSE register */
537 {1, 1, 1}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {1, 1, 1}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 1, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 128, /* size of l2 cache. */
544 32, /* size of prefetch block */
545 1, /* number of parallel prefetches */
546 1, /* Branch cost */
547 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (1), /* cost of FABS instruction. */
551 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
553 geode_memcpy,
554 geode_memset,
555 1, /* scalar_stmt_cost. */
556 1, /* scalar load_cost. */
557 1, /* scalar_store_cost. */
558 1, /* vec_stmt_cost. */
559 1, /* vec_to_scalar_cost. */
560 1, /* scalar_to_vec_cost. */
561 1, /* vec_align_load_cost. */
562 2, /* vec_unalign_load_cost. */
563 1, /* vec_store_cost. */
564 3, /* cond_taken_branch_cost. */
565 1, /* cond_not_taken_branch_cost. */
568 static stringop_algs k6_memcpy[2] = {
569 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
570 DUMMY_STRINGOP_ALGS};
571 static stringop_algs k6_memset[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static const
575 struct processor_costs k6_cost = {
576 COSTS_N_INSNS (1), /* cost of an add instruction */
577 COSTS_N_INSNS (2), /* cost of a lea instruction */
578 COSTS_N_INSNS (1), /* variable shift costs */
579 COSTS_N_INSNS (1), /* constant shift costs */
580 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
581 COSTS_N_INSNS (3), /* HI */
582 COSTS_N_INSNS (3), /* SI */
583 COSTS_N_INSNS (3), /* DI */
584 COSTS_N_INSNS (3)}, /* other */
585 0, /* cost of multiply per each bit set */
586 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
587 COSTS_N_INSNS (18), /* HI */
588 COSTS_N_INSNS (18), /* SI */
589 COSTS_N_INSNS (18), /* DI */
590 COSTS_N_INSNS (18)}, /* other */
591 COSTS_N_INSNS (2), /* cost of movsx */
592 COSTS_N_INSNS (2), /* cost of movzx */
593 8, /* "large" insn */
594 4, /* MOVE_RATIO */
595 3, /* cost for loading QImode using movzbl */
596 {4, 5, 4}, /* cost of loading integer registers
597 in QImode, HImode and SImode.
598 Relative to reg-reg move (2). */
599 {2, 3, 2}, /* cost of storing integer registers */
600 4, /* cost of reg,reg fld/fst */
601 {6, 6, 6}, /* cost of loading fp registers
602 in SFmode, DFmode and XFmode */
603 {4, 4, 4}, /* cost of storing fp registers
604 in SFmode, DFmode and XFmode */
605 2, /* cost of moving MMX register */
606 {2, 2}, /* cost of loading MMX registers
607 in SImode and DImode */
608 {2, 2}, /* cost of storing MMX registers
609 in SImode and DImode */
610 2, /* cost of moving SSE register */
611 {2, 2, 8}, /* cost of loading SSE registers
612 in SImode, DImode and TImode */
613 {2, 2, 8}, /* cost of storing SSE registers
614 in SImode, DImode and TImode */
615 6, /* MMX or SSE register to integer */
616 32, /* size of l1 cache. */
617 32, /* size of l2 cache. Some models
618 have integrated l2 cache, but
619 optimizing for k6 is not important
620 enough to worry about that. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (2), /* cost of FABS instruction. */
628 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
630 k6_memcpy,
631 k6_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 static stringop_algs athlon_memcpy[2] = {
649 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static stringop_algs athlon_memset[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static const
655 struct processor_costs athlon_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (5), /* HI */
662 COSTS_N_INSNS (5), /* SI */
663 COSTS_N_INSNS (5), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
674 9, /* MOVE_RATIO */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 4}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 4, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 256, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 6, /* number of parallel prefetches */
700 5, /* Branch cost */
701 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
707 athlon_memcpy,
708 athlon_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
722 /* K8 has optimized REP instruction for medium sized blocks, but for very
723 small blocks it is better to use loop. For large blocks, libcall can
724 do nontemporary accesses and beat inline considerably. */
725 static stringop_algs k8_memcpy[2] = {
726 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
727 {-1, rep_prefix_4_byte, false}}},
728 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
729 {-1, libcall, false}}}};
730 static stringop_algs k8_memset[2] = {
731 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
732 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
733 {libcall, {{48, unrolled_loop, false},
734 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
735 static const
736 struct processor_costs k8_cost = {
737 COSTS_N_INSNS (1), /* cost of an add instruction */
738 COSTS_N_INSNS (2), /* cost of a lea instruction */
739 COSTS_N_INSNS (1), /* variable shift costs */
740 COSTS_N_INSNS (1), /* constant shift costs */
741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
742 COSTS_N_INSNS (4), /* HI */
743 COSTS_N_INSNS (3), /* SI */
744 COSTS_N_INSNS (4), /* DI */
745 COSTS_N_INSNS (5)}, /* other */
746 0, /* cost of multiply per each bit set */
747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
748 COSTS_N_INSNS (26), /* HI */
749 COSTS_N_INSNS (42), /* SI */
750 COSTS_N_INSNS (74), /* DI */
751 COSTS_N_INSNS (74)}, /* other */
752 COSTS_N_INSNS (1), /* cost of movsx */
753 COSTS_N_INSNS (1), /* cost of movzx */
754 8, /* "large" insn */
755 9, /* MOVE_RATIO */
756 4, /* cost for loading QImode using movzbl */
757 {3, 4, 3}, /* cost of loading integer registers
758 in QImode, HImode and SImode.
759 Relative to reg-reg move (2). */
760 {3, 4, 3}, /* cost of storing integer registers */
761 4, /* cost of reg,reg fld/fst */
762 {4, 4, 12}, /* cost of loading fp registers
763 in SFmode, DFmode and XFmode */
764 {6, 6, 8}, /* cost of storing fp registers
765 in SFmode, DFmode and XFmode */
766 2, /* cost of moving MMX register */
767 {3, 3}, /* cost of loading MMX registers
768 in SImode and DImode */
769 {4, 4}, /* cost of storing MMX registers
770 in SImode and DImode */
771 2, /* cost of moving SSE register */
772 {4, 3, 6}, /* cost of loading SSE registers
773 in SImode, DImode and TImode */
774 {4, 4, 5}, /* cost of storing SSE registers
775 in SImode, DImode and TImode */
776 5, /* MMX or SSE register to integer */
777 64, /* size of l1 cache. */
778 512, /* size of l2 cache. */
779 64, /* size of prefetch block */
780 /* New AMD processors never drop prefetches; if they cannot be performed
781 immediately, they are queued. We set number of simultaneous prefetches
782 to a large constant to reflect this (it probably is not a good idea not
783 to limit number of prefetches at all, as their execution also takes some
784 time). */
785 100, /* number of parallel prefetches */
786 3, /* Branch cost */
787 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
788 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
789 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
790 COSTS_N_INSNS (2), /* cost of FABS instruction. */
791 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
792 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
794 k8_memcpy,
795 k8_memset,
796 4, /* scalar_stmt_cost. */
797 2, /* scalar load_cost. */
798 2, /* scalar_store_cost. */
799 5, /* vec_stmt_cost. */
800 0, /* vec_to_scalar_cost. */
801 2, /* scalar_to_vec_cost. */
802 2, /* vec_align_load_cost. */
803 3, /* vec_unalign_load_cost. */
804 3, /* vec_store_cost. */
805 3, /* cond_taken_branch_cost. */
806 2, /* cond_not_taken_branch_cost. */
809 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
810 very small blocks it is better to use loop. For large blocks, libcall can
811 do nontemporary accesses and beat inline considerably. */
812 static stringop_algs amdfam10_memcpy[2] = {
813 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
814 {-1, rep_prefix_4_byte, false}}},
815 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
816 {-1, libcall, false}}}};
817 static stringop_algs amdfam10_memset[2] = {
818 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
819 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
820 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
821 {-1, libcall, false}}}};
822 struct processor_costs amdfam10_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 512, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 amdfam10_memcpy,
889 amdfam10_memset,
890 4, /* scalar_stmt_cost. */
891 2, /* scalar load_cost. */
892 2, /* scalar_store_cost. */
893 6, /* vec_stmt_cost. */
894 0, /* vec_to_scalar_cost. */
895 2, /* scalar_to_vec_cost. */
896 2, /* vec_align_load_cost. */
897 2, /* vec_unalign_load_cost. */
898 2, /* vec_store_cost. */
899 2, /* cond_taken_branch_cost. */
900 1, /* cond_not_taken_branch_cost. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 static stringop_algs bdver1_memcpy[2] = {
907 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
908 {-1, rep_prefix_4_byte, false}}},
909 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
910 {-1, libcall, false}}}};
911 static stringop_algs bdver1_memset[2] = {
912 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
914 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
915 {-1, libcall, false}}}};
917 const struct processor_costs bdver1_cost = {
918 COSTS_N_INSNS (1), /* cost of an add instruction */
919 COSTS_N_INSNS (1), /* cost of a lea instruction */
920 COSTS_N_INSNS (1), /* variable shift costs */
921 COSTS_N_INSNS (1), /* constant shift costs */
922 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
923 COSTS_N_INSNS (4), /* HI */
924 COSTS_N_INSNS (4), /* SI */
925 COSTS_N_INSNS (6), /* DI */
926 COSTS_N_INSNS (6)}, /* other */
927 0, /* cost of multiply per each bit set */
928 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
929 COSTS_N_INSNS (35), /* HI */
930 COSTS_N_INSNS (51), /* SI */
931 COSTS_N_INSNS (83), /* DI */
932 COSTS_N_INSNS (83)}, /* other */
933 COSTS_N_INSNS (1), /* cost of movsx */
934 COSTS_N_INSNS (1), /* cost of movzx */
935 8, /* "large" insn */
936 9, /* MOVE_RATIO */
937 4, /* cost for loading QImode using movzbl */
938 {5, 5, 4}, /* cost of loading integer registers
939 in QImode, HImode and SImode.
940 Relative to reg-reg move (2). */
941 {4, 4, 4}, /* cost of storing integer registers */
942 2, /* cost of reg,reg fld/fst */
943 {5, 5, 12}, /* cost of loading fp registers
944 in SFmode, DFmode and XFmode */
945 {4, 4, 8}, /* cost of storing fp registers
946 in SFmode, DFmode and XFmode */
947 2, /* cost of moving MMX register */
948 {4, 4}, /* cost of loading MMX registers
949 in SImode and DImode */
950 {4, 4}, /* cost of storing MMX registers
951 in SImode and DImode */
952 2, /* cost of moving SSE register */
953 {4, 4, 4}, /* cost of loading SSE registers
954 in SImode, DImode and TImode */
955 {4, 4, 4}, /* cost of storing SSE registers
956 in SImode, DImode and TImode */
957 2, /* MMX or SSE register to integer */
958 /* On K8:
959 MOVD reg64, xmmreg Double FSTORE 4
960 MOVD reg32, xmmreg Double FSTORE 4
961 On AMDFAM10:
962 MOVD reg64, xmmreg Double FADD 3
963 1/1 1/1
964 MOVD reg32, xmmreg Double FADD 3
965 1/1 1/1 */
966 16, /* size of l1 cache. */
967 2048, /* size of l2 cache. */
968 64, /* size of prefetch block */
969 /* New AMD processors never drop prefetches; if they cannot be performed
970 immediately, they are queued. We set number of simultaneous prefetches
971 to a large constant to reflect this (it probably is not a good idea not
972 to limit number of prefetches at all, as their execution also takes some
973 time). */
974 100, /* number of parallel prefetches */
975 2, /* Branch cost */
976 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
977 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
978 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
979 COSTS_N_INSNS (2), /* cost of FABS instruction. */
980 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
981 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
983 bdver1_memcpy,
984 bdver1_memset,
985 6, /* scalar_stmt_cost. */
986 4, /* scalar load_cost. */
987 4, /* scalar_store_cost. */
988 6, /* vec_stmt_cost. */
989 0, /* vec_to_scalar_cost. */
990 2, /* scalar_to_vec_cost. */
991 4, /* vec_align_load_cost. */
992 4, /* vec_unalign_load_cost. */
993 4, /* vec_store_cost. */
994 2, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
998 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
999 very small blocks it is better to use loop. For large blocks, libcall
1000 can do nontemporary accesses and beat inline considerably. */
1002 static stringop_algs bdver2_memcpy[2] = {
1003 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1004 {-1, rep_prefix_4_byte, false}}},
1005 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1006 {-1, libcall, false}}}};
1007 static stringop_algs bdver2_memset[2] = {
1008 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1009 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1010 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1011 {-1, libcall, false}}}};
1013 const struct processor_costs bdver2_cost = {
1014 COSTS_N_INSNS (1), /* cost of an add instruction */
1015 COSTS_N_INSNS (1), /* cost of a lea instruction */
1016 COSTS_N_INSNS (1), /* variable shift costs */
1017 COSTS_N_INSNS (1), /* constant shift costs */
1018 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1019 COSTS_N_INSNS (4), /* HI */
1020 COSTS_N_INSNS (4), /* SI */
1021 COSTS_N_INSNS (6), /* DI */
1022 COSTS_N_INSNS (6)}, /* other */
1023 0, /* cost of multiply per each bit set */
1024 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1025 COSTS_N_INSNS (35), /* HI */
1026 COSTS_N_INSNS (51), /* SI */
1027 COSTS_N_INSNS (83), /* DI */
1028 COSTS_N_INSNS (83)}, /* other */
1029 COSTS_N_INSNS (1), /* cost of movsx */
1030 COSTS_N_INSNS (1), /* cost of movzx */
1031 8, /* "large" insn */
1032 9, /* MOVE_RATIO */
1033 4, /* cost for loading QImode using movzbl */
1034 {5, 5, 4}, /* cost of loading integer registers
1035 in QImode, HImode and SImode.
1036 Relative to reg-reg move (2). */
1037 {4, 4, 4}, /* cost of storing integer registers */
1038 2, /* cost of reg,reg fld/fst */
1039 {5, 5, 12}, /* cost of loading fp registers
1040 in SFmode, DFmode and XFmode */
1041 {4, 4, 8}, /* cost of storing fp registers
1042 in SFmode, DFmode and XFmode */
1043 2, /* cost of moving MMX register */
1044 {4, 4}, /* cost of loading MMX registers
1045 in SImode and DImode */
1046 {4, 4}, /* cost of storing MMX registers
1047 in SImode and DImode */
1048 2, /* cost of moving SSE register */
1049 {4, 4, 4}, /* cost of loading SSE registers
1050 in SImode, DImode and TImode */
1051 {4, 4, 4}, /* cost of storing SSE registers
1052 in SImode, DImode and TImode */
1053 2, /* MMX or SSE register to integer */
1054 /* On K8:
1055 MOVD reg64, xmmreg Double FSTORE 4
1056 MOVD reg32, xmmreg Double FSTORE 4
1057 On AMDFAM10:
1058 MOVD reg64, xmmreg Double FADD 3
1059 1/1 1/1
1060 MOVD reg32, xmmreg Double FADD 3
1061 1/1 1/1 */
1062 16, /* size of l1 cache. */
1063 2048, /* size of l2 cache. */
1064 64, /* size of prefetch block */
1065 /* New AMD processors never drop prefetches; if they cannot be performed
1066 immediately, they are queued. We set number of simultaneous prefetches
1067 to a large constant to reflect this (it probably is not a good idea not
1068 to limit number of prefetches at all, as their execution also takes some
1069 time). */
1070 100, /* number of parallel prefetches */
1071 2, /* Branch cost */
1072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1073 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1074 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1075 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1076 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1077 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1079 bdver2_memcpy,
1080 bdver2_memset,
1081 6, /* scalar_stmt_cost. */
1082 4, /* scalar load_cost. */
1083 4, /* scalar_store_cost. */
1084 6, /* vec_stmt_cost. */
1085 0, /* vec_to_scalar_cost. */
1086 2, /* scalar_to_vec_cost. */
1087 4, /* vec_align_load_cost. */
1088 4, /* vec_unalign_load_cost. */
1089 4, /* vec_store_cost. */
1090 2, /* cond_taken_branch_cost. */
1091 1, /* cond_not_taken_branch_cost. */
1095 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1096 very small blocks it is better to use loop. For large blocks, libcall
1097 can do nontemporary accesses and beat inline considerably. */
1098 static stringop_algs bdver3_memcpy[2] = {
1099 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1100 {-1, rep_prefix_4_byte, false}}},
1101 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1102 {-1, libcall, false}}}};
1103 static stringop_algs bdver3_memset[2] = {
1104 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1105 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1106 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1107 {-1, libcall, false}}}};
1108 struct processor_costs bdver3_cost = {
1109 COSTS_N_INSNS (1), /* cost of an add instruction */
1110 COSTS_N_INSNS (1), /* cost of a lea instruction */
1111 COSTS_N_INSNS (1), /* variable shift costs */
1112 COSTS_N_INSNS (1), /* constant shift costs */
1113 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1114 COSTS_N_INSNS (4), /* HI */
1115 COSTS_N_INSNS (4), /* SI */
1116 COSTS_N_INSNS (6), /* DI */
1117 COSTS_N_INSNS (6)}, /* other */
1118 0, /* cost of multiply per each bit set */
1119 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1120 COSTS_N_INSNS (35), /* HI */
1121 COSTS_N_INSNS (51), /* SI */
1122 COSTS_N_INSNS (83), /* DI */
1123 COSTS_N_INSNS (83)}, /* other */
1124 COSTS_N_INSNS (1), /* cost of movsx */
1125 COSTS_N_INSNS (1), /* cost of movzx */
1126 8, /* "large" insn */
1127 9, /* MOVE_RATIO */
1128 4, /* cost for loading QImode using movzbl */
1129 {5, 5, 4}, /* cost of loading integer registers
1130 in QImode, HImode and SImode.
1131 Relative to reg-reg move (2). */
1132 {4, 4, 4}, /* cost of storing integer registers */
1133 2, /* cost of reg,reg fld/fst */
1134 {5, 5, 12}, /* cost of loading fp registers
1135 in SFmode, DFmode and XFmode */
1136 {4, 4, 8}, /* cost of storing fp registers
1137 in SFmode, DFmode and XFmode */
1138 2, /* cost of moving MMX register */
1139 {4, 4}, /* cost of loading MMX registers
1140 in SImode and DImode */
1141 {4, 4}, /* cost of storing MMX registers
1142 in SImode and DImode */
1143 2, /* cost of moving SSE register */
1144 {4, 4, 4}, /* cost of loading SSE registers
1145 in SImode, DImode and TImode */
1146 {4, 4, 4}, /* cost of storing SSE registers
1147 in SImode, DImode and TImode */
1148 2, /* MMX or SSE register to integer */
1149 16, /* size of l1 cache. */
1150 2048, /* size of l2 cache. */
1151 64, /* size of prefetch block */
1152 /* New AMD processors never drop prefetches; if they cannot be performed
1153 immediately, they are queued. We set number of simultaneous prefetches
1154 to a large constant to reflect this (it probably is not a good idea not
1155 to limit number of prefetches at all, as their execution also takes some
1156 time). */
1157 100, /* number of parallel prefetches */
1158 2, /* Branch cost */
1159 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1166 bdver3_memcpy,
1167 bdver3_memset,
1168 6, /* scalar_stmt_cost. */
1169 4, /* scalar load_cost. */
1170 4, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 4, /* vec_align_load_cost. */
1175 4, /* vec_unalign_load_cost. */
1176 4, /* vec_store_cost. */
1177 2, /* cond_taken_branch_cost. */
1178 1, /* cond_not_taken_branch_cost. */
1181 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1182 very small blocks it is better to use loop. For large blocks, libcall
1183 can do nontemporary accesses and beat inline considerably. */
1184 static stringop_algs bdver4_memcpy[2] = {
1185 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1186 {-1, rep_prefix_4_byte, false}}},
1187 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1188 {-1, libcall, false}}}};
1189 static stringop_algs bdver4_memset[2] = {
1190 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1191 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1192 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1193 {-1, libcall, false}}}};
1194 struct processor_costs bdver4_cost = {
1195 COSTS_N_INSNS (1), /* cost of an add instruction */
1196 COSTS_N_INSNS (1), /* cost of a lea instruction */
1197 COSTS_N_INSNS (1), /* variable shift costs */
1198 COSTS_N_INSNS (1), /* constant shift costs */
1199 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1200 COSTS_N_INSNS (4), /* HI */
1201 COSTS_N_INSNS (4), /* SI */
1202 COSTS_N_INSNS (6), /* DI */
1203 COSTS_N_INSNS (6)}, /* other */
1204 0, /* cost of multiply per each bit set */
1205 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1206 COSTS_N_INSNS (35), /* HI */
1207 COSTS_N_INSNS (51), /* SI */
1208 COSTS_N_INSNS (83), /* DI */
1209 COSTS_N_INSNS (83)}, /* other */
1210 COSTS_N_INSNS (1), /* cost of movsx */
1211 COSTS_N_INSNS (1), /* cost of movzx */
1212 8, /* "large" insn */
1213 9, /* MOVE_RATIO */
1214 4, /* cost for loading QImode using movzbl */
1215 {5, 5, 4}, /* cost of loading integer registers
1216 in QImode, HImode and SImode.
1217 Relative to reg-reg move (2). */
1218 {4, 4, 4}, /* cost of storing integer registers */
1219 2, /* cost of reg,reg fld/fst */
1220 {5, 5, 12}, /* cost of loading fp registers
1221 in SFmode, DFmode and XFmode */
1222 {4, 4, 8}, /* cost of storing fp registers
1223 in SFmode, DFmode and XFmode */
1224 2, /* cost of moving MMX register */
1225 {4, 4}, /* cost of loading MMX registers
1226 in SImode and DImode */
1227 {4, 4}, /* cost of storing MMX registers
1228 in SImode and DImode */
1229 2, /* cost of moving SSE register */
1230 {4, 4, 4}, /* cost of loading SSE registers
1231 in SImode, DImode and TImode */
1232 {4, 4, 4}, /* cost of storing SSE registers
1233 in SImode, DImode and TImode */
1234 2, /* MMX or SSE register to integer */
1235 16, /* size of l1 cache. */
1236 2048, /* size of l2 cache. */
1237 64, /* size of prefetch block */
1238 /* New AMD processors never drop prefetches; if they cannot be performed
1239 immediately, they are queued. We set number of simultaneous prefetches
1240 to a large constant to reflect this (it probably is not a good idea not
1241 to limit number of prefetches at all, as their execution also takes some
1242 time). */
1243 100, /* number of parallel prefetches */
1244 2, /* Branch cost */
1245 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1246 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1247 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1248 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1249 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1250 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1252 bdver4_memcpy,
1253 bdver4_memset,
1254 6, /* scalar_stmt_cost. */
1255 4, /* scalar load_cost. */
1256 4, /* scalar_store_cost. */
1257 6, /* vec_stmt_cost. */
1258 0, /* vec_to_scalar_cost. */
1259 2, /* scalar_to_vec_cost. */
1260 4, /* vec_align_load_cost. */
1261 4, /* vec_unalign_load_cost. */
1262 4, /* vec_store_cost. */
1263 2, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1267 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1268 very small blocks it is better to use loop. For large blocks, libcall can
1269 do nontemporary accesses and beat inline considerably. */
1270 static stringop_algs btver1_memcpy[2] = {
1271 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1272 {-1, rep_prefix_4_byte, false}}},
1273 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1274 {-1, libcall, false}}}};
1275 static stringop_algs btver1_memset[2] = {
1276 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1277 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1278 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1279 {-1, libcall, false}}}};
1280 const struct processor_costs btver1_cost = {
1281 COSTS_N_INSNS (1), /* cost of an add instruction */
1282 COSTS_N_INSNS (2), /* cost of a lea instruction */
1283 COSTS_N_INSNS (1), /* variable shift costs */
1284 COSTS_N_INSNS (1), /* constant shift costs */
1285 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1286 COSTS_N_INSNS (4), /* HI */
1287 COSTS_N_INSNS (3), /* SI */
1288 COSTS_N_INSNS (4), /* DI */
1289 COSTS_N_INSNS (5)}, /* other */
1290 0, /* cost of multiply per each bit set */
1291 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1292 COSTS_N_INSNS (35), /* HI */
1293 COSTS_N_INSNS (51), /* SI */
1294 COSTS_N_INSNS (83), /* DI */
1295 COSTS_N_INSNS (83)}, /* other */
1296 COSTS_N_INSNS (1), /* cost of movsx */
1297 COSTS_N_INSNS (1), /* cost of movzx */
1298 8, /* "large" insn */
1299 9, /* MOVE_RATIO */
1300 4, /* cost for loading QImode using movzbl */
1301 {3, 4, 3}, /* cost of loading integer registers
1302 in QImode, HImode and SImode.
1303 Relative to reg-reg move (2). */
1304 {3, 4, 3}, /* cost of storing integer registers */
1305 4, /* cost of reg,reg fld/fst */
1306 {4, 4, 12}, /* cost of loading fp registers
1307 in SFmode, DFmode and XFmode */
1308 {6, 6, 8}, /* cost of storing fp registers
1309 in SFmode, DFmode and XFmode */
1310 2, /* cost of moving MMX register */
1311 {3, 3}, /* cost of loading MMX registers
1312 in SImode and DImode */
1313 {4, 4}, /* cost of storing MMX registers
1314 in SImode and DImode */
1315 2, /* cost of moving SSE register */
1316 {4, 4, 3}, /* cost of loading SSE registers
1317 in SImode, DImode and TImode */
1318 {4, 4, 5}, /* cost of storing SSE registers
1319 in SImode, DImode and TImode */
1320 3, /* MMX or SSE register to integer */
1321 /* On K8:
1322 MOVD reg64, xmmreg Double FSTORE 4
1323 MOVD reg32, xmmreg Double FSTORE 4
1324 On AMDFAM10:
1325 MOVD reg64, xmmreg Double FADD 3
1326 1/1 1/1
1327 MOVD reg32, xmmreg Double FADD 3
1328 1/1 1/1 */
1329 32, /* size of l1 cache. */
1330 512, /* size of l2 cache. */
1331 64, /* size of prefetch block */
1332 100, /* number of parallel prefetches */
1333 2, /* Branch cost */
1334 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1335 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1336 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1337 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1338 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1339 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1341 btver1_memcpy,
1342 btver1_memset,
1343 4, /* scalar_stmt_cost. */
1344 2, /* scalar load_cost. */
1345 2, /* scalar_store_cost. */
1346 6, /* vec_stmt_cost. */
1347 0, /* vec_to_scalar_cost. */
1348 2, /* scalar_to_vec_cost. */
1349 2, /* vec_align_load_cost. */
1350 2, /* vec_unalign_load_cost. */
1351 2, /* vec_store_cost. */
1352 2, /* cond_taken_branch_cost. */
1353 1, /* cond_not_taken_branch_cost. */
1356 static stringop_algs btver2_memcpy[2] = {
1357 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1358 {-1, rep_prefix_4_byte, false}}},
1359 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1360 {-1, libcall, false}}}};
1361 static stringop_algs btver2_memset[2] = {
1362 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1363 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1364 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1365 {-1, libcall, false}}}};
1366 const struct processor_costs btver2_cost = {
1367 COSTS_N_INSNS (1), /* cost of an add instruction */
1368 COSTS_N_INSNS (2), /* cost of a lea instruction */
1369 COSTS_N_INSNS (1), /* variable shift costs */
1370 COSTS_N_INSNS (1), /* constant shift costs */
1371 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1372 COSTS_N_INSNS (4), /* HI */
1373 COSTS_N_INSNS (3), /* SI */
1374 COSTS_N_INSNS (4), /* DI */
1375 COSTS_N_INSNS (5)}, /* other */
1376 0, /* cost of multiply per each bit set */
1377 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1378 COSTS_N_INSNS (35), /* HI */
1379 COSTS_N_INSNS (51), /* SI */
1380 COSTS_N_INSNS (83), /* DI */
1381 COSTS_N_INSNS (83)}, /* other */
1382 COSTS_N_INSNS (1), /* cost of movsx */
1383 COSTS_N_INSNS (1), /* cost of movzx */
1384 8, /* "large" insn */
1385 9, /* MOVE_RATIO */
1386 4, /* cost for loading QImode using movzbl */
1387 {3, 4, 3}, /* cost of loading integer registers
1388 in QImode, HImode and SImode.
1389 Relative to reg-reg move (2). */
1390 {3, 4, 3}, /* cost of storing integer registers */
1391 4, /* cost of reg,reg fld/fst */
1392 {4, 4, 12}, /* cost of loading fp registers
1393 in SFmode, DFmode and XFmode */
1394 {6, 6, 8}, /* cost of storing fp registers
1395 in SFmode, DFmode and XFmode */
1396 2, /* cost of moving MMX register */
1397 {3, 3}, /* cost of loading MMX registers
1398 in SImode and DImode */
1399 {4, 4}, /* cost of storing MMX registers
1400 in SImode and DImode */
1401 2, /* cost of moving SSE register */
1402 {4, 4, 3}, /* cost of loading SSE registers
1403 in SImode, DImode and TImode */
1404 {4, 4, 5}, /* cost of storing SSE registers
1405 in SImode, DImode and TImode */
1406 3, /* MMX or SSE register to integer */
1407 /* On K8:
1408 MOVD reg64, xmmreg Double FSTORE 4
1409 MOVD reg32, xmmreg Double FSTORE 4
1410 On AMDFAM10:
1411 MOVD reg64, xmmreg Double FADD 3
1412 1/1 1/1
1413 MOVD reg32, xmmreg Double FADD 3
1414 1/1 1/1 */
1415 32, /* size of l1 cache. */
1416 2048, /* size of l2 cache. */
1417 64, /* size of prefetch block */
1418 100, /* number of parallel prefetches */
1419 2, /* Branch cost */
1420 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1421 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1422 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1423 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1424 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1425 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1426 btver2_memcpy,
1427 btver2_memset,
1428 4, /* scalar_stmt_cost. */
1429 2, /* scalar load_cost. */
1430 2, /* scalar_store_cost. */
1431 6, /* vec_stmt_cost. */
1432 0, /* vec_to_scalar_cost. */
1433 2, /* scalar_to_vec_cost. */
1434 2, /* vec_align_load_cost. */
1435 2, /* vec_unalign_load_cost. */
1436 2, /* vec_store_cost. */
1437 2, /* cond_taken_branch_cost. */
1438 1, /* cond_not_taken_branch_cost. */
1441 static stringop_algs pentium4_memcpy[2] = {
1442 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1443 DUMMY_STRINGOP_ALGS};
1444 static stringop_algs pentium4_memset[2] = {
1445 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1446 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1447 DUMMY_STRINGOP_ALGS};
1449 static const
1450 struct processor_costs pentium4_cost = {
1451 COSTS_N_INSNS (1), /* cost of an add instruction */
1452 COSTS_N_INSNS (3), /* cost of a lea instruction */
1453 COSTS_N_INSNS (4), /* variable shift costs */
1454 COSTS_N_INSNS (4), /* constant shift costs */
1455 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1456 COSTS_N_INSNS (15), /* HI */
1457 COSTS_N_INSNS (15), /* SI */
1458 COSTS_N_INSNS (15), /* DI */
1459 COSTS_N_INSNS (15)}, /* other */
1460 0, /* cost of multiply per each bit set */
1461 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1462 COSTS_N_INSNS (56), /* HI */
1463 COSTS_N_INSNS (56), /* SI */
1464 COSTS_N_INSNS (56), /* DI */
1465 COSTS_N_INSNS (56)}, /* other */
1466 COSTS_N_INSNS (1), /* cost of movsx */
1467 COSTS_N_INSNS (1), /* cost of movzx */
1468 16, /* "large" insn */
1469 6, /* MOVE_RATIO */
1470 2, /* cost for loading QImode using movzbl */
1471 {4, 5, 4}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {2, 3, 2}, /* cost of storing integer registers */
1475 2, /* cost of reg,reg fld/fst */
1476 {2, 2, 6}, /* cost of loading fp registers
1477 in SFmode, DFmode and XFmode */
1478 {4, 4, 6}, /* cost of storing fp registers
1479 in SFmode, DFmode and XFmode */
1480 2, /* cost of moving MMX register */
1481 {2, 2}, /* cost of loading MMX registers
1482 in SImode and DImode */
1483 {2, 2}, /* cost of storing MMX registers
1484 in SImode and DImode */
1485 12, /* cost of moving SSE register */
1486 {12, 12, 12}, /* cost of loading SSE registers
1487 in SImode, DImode and TImode */
1488 {2, 2, 8}, /* cost of storing SSE registers
1489 in SImode, DImode and TImode */
1490 10, /* MMX or SSE register to integer */
1491 8, /* size of l1 cache. */
1492 256, /* size of l2 cache. */
1493 64, /* size of prefetch block */
1494 6, /* number of parallel prefetches */
1495 2, /* Branch cost */
1496 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1497 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1498 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1501 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1502 pentium4_memcpy,
1503 pentium4_memset,
1504 1, /* scalar_stmt_cost. */
1505 1, /* scalar load_cost. */
1506 1, /* scalar_store_cost. */
1507 1, /* vec_stmt_cost. */
1508 1, /* vec_to_scalar_cost. */
1509 1, /* scalar_to_vec_cost. */
1510 1, /* vec_align_load_cost. */
1511 2, /* vec_unalign_load_cost. */
1512 1, /* vec_store_cost. */
1513 3, /* cond_taken_branch_cost. */
1514 1, /* cond_not_taken_branch_cost. */
1517 static stringop_algs nocona_memcpy[2] = {
1518 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1519 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1520 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1522 static stringop_algs nocona_memset[2] = {
1523 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1524 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1525 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1526 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1528 static const
1529 struct processor_costs nocona_cost = {
1530 COSTS_N_INSNS (1), /* cost of an add instruction */
1531 COSTS_N_INSNS (1), /* cost of a lea instruction */
1532 COSTS_N_INSNS (1), /* variable shift costs */
1533 COSTS_N_INSNS (1), /* constant shift costs */
1534 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1535 COSTS_N_INSNS (10), /* HI */
1536 COSTS_N_INSNS (10), /* SI */
1537 COSTS_N_INSNS (10), /* DI */
1538 COSTS_N_INSNS (10)}, /* other */
1539 0, /* cost of multiply per each bit set */
1540 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1541 COSTS_N_INSNS (66), /* HI */
1542 COSTS_N_INSNS (66), /* SI */
1543 COSTS_N_INSNS (66), /* DI */
1544 COSTS_N_INSNS (66)}, /* other */
1545 COSTS_N_INSNS (1), /* cost of movsx */
1546 COSTS_N_INSNS (1), /* cost of movzx */
1547 16, /* "large" insn */
1548 17, /* MOVE_RATIO */
1549 4, /* cost for loading QImode using movzbl */
1550 {4, 4, 4}, /* cost of loading integer registers
1551 in QImode, HImode and SImode.
1552 Relative to reg-reg move (2). */
1553 {4, 4, 4}, /* cost of storing integer registers */
1554 3, /* cost of reg,reg fld/fst */
1555 {12, 12, 12}, /* cost of loading fp registers
1556 in SFmode, DFmode and XFmode */
1557 {4, 4, 4}, /* cost of storing fp registers
1558 in SFmode, DFmode and XFmode */
1559 6, /* cost of moving MMX register */
1560 {12, 12}, /* cost of loading MMX registers
1561 in SImode and DImode */
1562 {12, 12}, /* cost of storing MMX registers
1563 in SImode and DImode */
1564 6, /* cost of moving SSE register */
1565 {12, 12, 12}, /* cost of loading SSE registers
1566 in SImode, DImode and TImode */
1567 {12, 12, 12}, /* cost of storing SSE registers
1568 in SImode, DImode and TImode */
1569 8, /* MMX or SSE register to integer */
1570 8, /* size of l1 cache. */
1571 1024, /* size of l2 cache. */
1572 64, /* size of prefetch block */
1573 8, /* number of parallel prefetches */
1574 1, /* Branch cost */
1575 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1576 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1577 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1578 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1579 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1580 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1581 nocona_memcpy,
1582 nocona_memset,
1583 1, /* scalar_stmt_cost. */
1584 1, /* scalar load_cost. */
1585 1, /* scalar_store_cost. */
1586 1, /* vec_stmt_cost. */
1587 1, /* vec_to_scalar_cost. */
1588 1, /* scalar_to_vec_cost. */
1589 1, /* vec_align_load_cost. */
1590 2, /* vec_unalign_load_cost. */
1591 1, /* vec_store_cost. */
1592 3, /* cond_taken_branch_cost. */
1593 1, /* cond_not_taken_branch_cost. */
1596 static stringop_algs atom_memcpy[2] = {
1597 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1598 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1599 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1600 static stringop_algs atom_memset[2] = {
1601 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1602 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1604 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs atom_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (4), /* HI */
1613 COSTS_N_INSNS (3), /* SI */
1614 COSTS_N_INSNS (4), /* DI */
1615 COSTS_N_INSNS (2)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (26), /* HI */
1619 COSTS_N_INSNS (42), /* SI */
1620 COSTS_N_INSNS (74), /* DI */
1621 COSTS_N_INSNS (74)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 8, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 4, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {6, 6, 8}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {8, 8}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {8, 8}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 2, /* cost of moving SSE register */
1642 {8, 8, 8}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {8, 8, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 5, /* MMX or SSE register to integer */
1647 32, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 3, /* Branch cost */
1652 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1658 atom_memcpy,
1659 atom_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1673 static stringop_algs slm_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs slm_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs slm_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (3), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 slm_memcpy,
1736 slm_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1750 static stringop_algs intel_memcpy[2] = {
1751 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1752 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1754 static stringop_algs intel_memset[2] = {
1755 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1756 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1757 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1758 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1759 static const
1760 struct processor_costs intel_cost = {
1761 COSTS_N_INSNS (1), /* cost of an add instruction */
1762 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1763 COSTS_N_INSNS (1), /* variable shift costs */
1764 COSTS_N_INSNS (1), /* constant shift costs */
1765 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1766 COSTS_N_INSNS (3), /* HI */
1767 COSTS_N_INSNS (3), /* SI */
1768 COSTS_N_INSNS (4), /* DI */
1769 COSTS_N_INSNS (2)}, /* other */
1770 0, /* cost of multiply per each bit set */
1771 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1772 COSTS_N_INSNS (26), /* HI */
1773 COSTS_N_INSNS (42), /* SI */
1774 COSTS_N_INSNS (74), /* DI */
1775 COSTS_N_INSNS (74)}, /* other */
1776 COSTS_N_INSNS (1), /* cost of movsx */
1777 COSTS_N_INSNS (1), /* cost of movzx */
1778 8, /* "large" insn */
1779 17, /* MOVE_RATIO */
1780 4, /* cost for loading QImode using movzbl */
1781 {4, 4, 4}, /* cost of loading integer registers
1782 in QImode, HImode and SImode.
1783 Relative to reg-reg move (2). */
1784 {4, 4, 4}, /* cost of storing integer registers */
1785 4, /* cost of reg,reg fld/fst */
1786 {12, 12, 12}, /* cost of loading fp registers
1787 in SFmode, DFmode and XFmode */
1788 {6, 6, 8}, /* cost of storing fp registers
1789 in SFmode, DFmode and XFmode */
1790 2, /* cost of moving MMX register */
1791 {8, 8}, /* cost of loading MMX registers
1792 in SImode and DImode */
1793 {8, 8}, /* cost of storing MMX registers
1794 in SImode and DImode */
1795 2, /* cost of moving SSE register */
1796 {8, 8, 8}, /* cost of loading SSE registers
1797 in SImode, DImode and TImode */
1798 {8, 8, 8}, /* cost of storing SSE registers
1799 in SImode, DImode and TImode */
1800 5, /* MMX or SSE register to integer */
1801 32, /* size of l1 cache. */
1802 256, /* size of l2 cache. */
1803 64, /* size of prefetch block */
1804 6, /* number of parallel prefetches */
1805 3, /* Branch cost */
1806 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1807 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1808 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1809 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1810 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1811 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1812 intel_memcpy,
1813 intel_memset,
1814 1, /* scalar_stmt_cost. */
1815 1, /* scalar load_cost. */
1816 1, /* scalar_store_cost. */
1817 1, /* vec_stmt_cost. */
1818 1, /* vec_to_scalar_cost. */
1819 1, /* scalar_to_vec_cost. */
1820 1, /* vec_align_load_cost. */
1821 2, /* vec_unalign_load_cost. */
1822 1, /* vec_store_cost. */
1823 3, /* cond_taken_branch_cost. */
1824 1, /* cond_not_taken_branch_cost. */
1827 /* Generic should produce code tuned for Core-i7 (and newer chips)
1828 and btver1 (and newer chips). */
1830 static stringop_algs generic_memcpy[2] = {
1831 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1832 {-1, libcall, false}}},
1833 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1834 {-1, libcall, false}}}};
1835 static stringop_algs generic_memset[2] = {
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1837 {-1, libcall, false}}},
1838 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1839 {-1, libcall, false}}}};
1840 static const
1841 struct processor_costs generic_cost = {
1842 COSTS_N_INSNS (1), /* cost of an add instruction */
1843 /* On all chips taken into consideration lea is 2 cycles and more. With
1844 this cost however our current implementation of synth_mult results in
1845 use of unnecessary temporary registers causing regression on several
1846 SPECfp benchmarks. */
1847 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1848 COSTS_N_INSNS (1), /* variable shift costs */
1849 COSTS_N_INSNS (1), /* constant shift costs */
1850 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1851 COSTS_N_INSNS (4), /* HI */
1852 COSTS_N_INSNS (3), /* SI */
1853 COSTS_N_INSNS (4), /* DI */
1854 COSTS_N_INSNS (2)}, /* other */
1855 0, /* cost of multiply per each bit set */
1856 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1857 COSTS_N_INSNS (26), /* HI */
1858 COSTS_N_INSNS (42), /* SI */
1859 COSTS_N_INSNS (74), /* DI */
1860 COSTS_N_INSNS (74)}, /* other */
1861 COSTS_N_INSNS (1), /* cost of movsx */
1862 COSTS_N_INSNS (1), /* cost of movzx */
1863 8, /* "large" insn */
1864 17, /* MOVE_RATIO */
1865 4, /* cost for loading QImode using movzbl */
1866 {4, 4, 4}, /* cost of loading integer registers
1867 in QImode, HImode and SImode.
1868 Relative to reg-reg move (2). */
1869 {4, 4, 4}, /* cost of storing integer registers */
1870 4, /* cost of reg,reg fld/fst */
1871 {12, 12, 12}, /* cost of loading fp registers
1872 in SFmode, DFmode and XFmode */
1873 {6, 6, 8}, /* cost of storing fp registers
1874 in SFmode, DFmode and XFmode */
1875 2, /* cost of moving MMX register */
1876 {8, 8}, /* cost of loading MMX registers
1877 in SImode and DImode */
1878 {8, 8}, /* cost of storing MMX registers
1879 in SImode and DImode */
1880 2, /* cost of moving SSE register */
1881 {8, 8, 8}, /* cost of loading SSE registers
1882 in SImode, DImode and TImode */
1883 {8, 8, 8}, /* cost of storing SSE registers
1884 in SImode, DImode and TImode */
1885 5, /* MMX or SSE register to integer */
1886 32, /* size of l1 cache. */
1887 512, /* size of l2 cache. */
1888 64, /* size of prefetch block */
1889 6, /* number of parallel prefetches */
1890 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1891 value is increased to perhaps more appropriate value of 5. */
1892 3, /* Branch cost */
1893 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1894 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1895 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1896 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1897 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1898 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1899 generic_memcpy,
1900 generic_memset,
1901 1, /* scalar_stmt_cost. */
1902 1, /* scalar load_cost. */
1903 1, /* scalar_store_cost. */
1904 1, /* vec_stmt_cost. */
1905 1, /* vec_to_scalar_cost. */
1906 1, /* scalar_to_vec_cost. */
1907 1, /* vec_align_load_cost. */
1908 2, /* vec_unalign_load_cost. */
1909 1, /* vec_store_cost. */
1910 3, /* cond_taken_branch_cost. */
1911 1, /* cond_not_taken_branch_cost. */
1914 /* core_cost should produce code tuned for Core familly of CPUs. */
1915 static stringop_algs core_memcpy[2] = {
1916 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1917 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1918 {-1, libcall, false}}}};
1919 static stringop_algs core_memset[2] = {
1920 {libcall, {{6, loop_1_byte, true},
1921 {24, loop, true},
1922 {8192, rep_prefix_4_byte, true},
1923 {-1, libcall, false}}},
1924 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1925 {-1, libcall, false}}}};
1927 static const
1928 struct processor_costs core_cost = {
1929 COSTS_N_INSNS (1), /* cost of an add instruction */
1930 /* On all chips taken into consideration lea is 2 cycles and more. With
1931 this cost however our current implementation of synth_mult results in
1932 use of unnecessary temporary registers causing regression on several
1933 SPECfp benchmarks. */
1934 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1935 COSTS_N_INSNS (1), /* variable shift costs */
1936 COSTS_N_INSNS (1), /* constant shift costs */
1937 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1938 COSTS_N_INSNS (4), /* HI */
1939 COSTS_N_INSNS (3), /* SI */
1940 COSTS_N_INSNS (4), /* DI */
1941 COSTS_N_INSNS (2)}, /* other */
1942 0, /* cost of multiply per each bit set */
1943 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1944 COSTS_N_INSNS (26), /* HI */
1945 COSTS_N_INSNS (42), /* SI */
1946 COSTS_N_INSNS (74), /* DI */
1947 COSTS_N_INSNS (74)}, /* other */
1948 COSTS_N_INSNS (1), /* cost of movsx */
1949 COSTS_N_INSNS (1), /* cost of movzx */
1950 8, /* "large" insn */
1951 17, /* MOVE_RATIO */
1952 4, /* cost for loading QImode using movzbl */
1953 {4, 4, 4}, /* cost of loading integer registers
1954 in QImode, HImode and SImode.
1955 Relative to reg-reg move (2). */
1956 {4, 4, 4}, /* cost of storing integer registers */
1957 4, /* cost of reg,reg fld/fst */
1958 {12, 12, 12}, /* cost of loading fp registers
1959 in SFmode, DFmode and XFmode */
1960 {6, 6, 8}, /* cost of storing fp registers
1961 in SFmode, DFmode and XFmode */
1962 2, /* cost of moving MMX register */
1963 {8, 8}, /* cost of loading MMX registers
1964 in SImode and DImode */
1965 {8, 8}, /* cost of storing MMX registers
1966 in SImode and DImode */
1967 2, /* cost of moving SSE register */
1968 {8, 8, 8}, /* cost of loading SSE registers
1969 in SImode, DImode and TImode */
1970 {8, 8, 8}, /* cost of storing SSE registers
1971 in SImode, DImode and TImode */
1972 5, /* MMX or SSE register to integer */
1973 64, /* size of l1 cache. */
1974 512, /* size of l2 cache. */
1975 64, /* size of prefetch block */
1976 6, /* number of parallel prefetches */
1977 /* FIXME perhaps more appropriate value is 5. */
1978 3, /* Branch cost */
1979 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1980 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1981 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1982 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1983 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1984 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1985 core_memcpy,
1986 core_memset,
1987 1, /* scalar_stmt_cost. */
1988 1, /* scalar load_cost. */
1989 1, /* scalar_store_cost. */
1990 1, /* vec_stmt_cost. */
1991 1, /* vec_to_scalar_cost. */
1992 1, /* scalar_to_vec_cost. */
1993 1, /* vec_align_load_cost. */
1994 2, /* vec_unalign_load_cost. */
1995 1, /* vec_store_cost. */
1996 3, /* cond_taken_branch_cost. */
1997 1, /* cond_not_taken_branch_cost. */
2001 /* Set by -mtune. */
2002 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2004 /* Set by -mtune or -Os. */
2005 const struct processor_costs *ix86_cost = &pentium_cost;
2007 /* Processor feature/optimization bitmasks. */
2008 #define m_386 (1<<PROCESSOR_I386)
2009 #define m_486 (1<<PROCESSOR_I486)
2010 #define m_PENT (1<<PROCESSOR_PENTIUM)
2011 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2012 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2013 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2014 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2015 #define m_CORE2 (1<<PROCESSOR_CORE2)
2016 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2017 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2018 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2019 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2020 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2021 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2022 #define m_INTEL (1<<PROCESSOR_INTEL)
2024 #define m_GEODE (1<<PROCESSOR_GEODE)
2025 #define m_K6 (1<<PROCESSOR_K6)
2026 #define m_K6_GEODE (m_K6 | m_GEODE)
2027 #define m_K8 (1<<PROCESSOR_K8)
2028 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2029 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2030 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2031 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2032 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2033 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2034 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2035 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2036 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2037 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2038 #define m_BTVER (m_BTVER1 | m_BTVER2)
2039 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2041 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2043 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2044 #undef DEF_TUNE
2045 #define DEF_TUNE(tune, name, selector) name,
2046 #include "x86-tune.def"
2047 #undef DEF_TUNE
2050 /* Feature tests against the various tunings. */
2051 unsigned char ix86_tune_features[X86_TUNE_LAST];
2053 /* Feature tests against the various tunings used to create ix86_tune_features
2054 based on the processor mask. */
2055 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2056 #undef DEF_TUNE
2057 #define DEF_TUNE(tune, name, selector) selector,
2058 #include "x86-tune.def"
2059 #undef DEF_TUNE
2062 /* Feature tests against the various architecture variations. */
2063 unsigned char ix86_arch_features[X86_ARCH_LAST];
2065 /* Feature tests against the various architecture variations, used to create
2066 ix86_arch_features based on the processor mask. */
2067 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2068 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2069 ~(m_386 | m_486 | m_PENT | m_K6),
2071 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2072 ~m_386,
2074 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2075 ~(m_386 | m_486),
2077 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2078 ~m_386,
2080 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2081 ~m_386,
2084 /* In case the average insn count for single function invocation is
2085 lower than this constant, emit fast (but longer) prologue and
2086 epilogue code. */
2087 #define FAST_PROLOGUE_INSN_COUNT 20
2089 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2090 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2091 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2092 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2094 /* Array of the smallest class containing reg number REGNO, indexed by
2095 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2097 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2099 /* ax, dx, cx, bx */
2100 AREG, DREG, CREG, BREG,
2101 /* si, di, bp, sp */
2102 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2103 /* FP registers */
2104 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2105 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2106 /* arg pointer */
2107 NON_Q_REGS,
2108 /* flags, fpsr, fpcr, frame */
2109 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2110 /* SSE registers */
2111 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2112 SSE_REGS, SSE_REGS,
2113 /* MMX registers */
2114 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2115 MMX_REGS, MMX_REGS,
2116 /* REX registers */
2117 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2118 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2119 /* SSE REX registers */
2120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2121 SSE_REGS, SSE_REGS,
2122 /* AVX-512 SSE registers */
2123 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2124 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2125 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 /* Mask registers. */
2128 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2129 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 /* The "default" register map used in 32bit mode. */
2134 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2145 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2148 /* The "default" register map used in 64bit mode. */
2150 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2152 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2153 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2154 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2155 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2156 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2157 8,9,10,11,12,13,14,15, /* extended integer registers */
2158 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2159 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2160 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2161 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2164 /* Define the register numbers to be used in Dwarf debugging information.
2165 The SVR4 reference port C compiler uses the following register numbers
2166 in its Dwarf output code:
2167 0 for %eax (gcc regno = 0)
2168 1 for %ecx (gcc regno = 2)
2169 2 for %edx (gcc regno = 1)
2170 3 for %ebx (gcc regno = 3)
2171 4 for %esp (gcc regno = 7)
2172 5 for %ebp (gcc regno = 6)
2173 6 for %esi (gcc regno = 4)
2174 7 for %edi (gcc regno = 5)
2175 The following three DWARF register numbers are never generated by
2176 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2177 believes these numbers have these meanings.
2178 8 for %eip (no gcc equivalent)
2179 9 for %eflags (gcc regno = 17)
2180 10 for %trapno (no gcc equivalent)
2181 It is not at all clear how we should number the FP stack registers
2182 for the x86 architecture. If the version of SDB on x86/svr4 were
2183 a bit less brain dead with respect to floating-point then we would
2184 have a precedent to follow with respect to DWARF register numbers
2185 for x86 FP registers, but the SDB on x86/svr4 is so completely
2186 broken with respect to FP registers that it is hardly worth thinking
2187 of it as something to strive for compatibility with.
2188 The version of x86/svr4 SDB I have at the moment does (partially)
2189 seem to believe that DWARF register number 11 is associated with
2190 the x86 register %st(0), but that's about all. Higher DWARF
2191 register numbers don't seem to be associated with anything in
2192 particular, and even for DWARF regno 11, SDB only seems to under-
2193 stand that it should say that a variable lives in %st(0) (when
2194 asked via an `=' command) if we said it was in DWARF regno 11,
2195 but SDB still prints garbage when asked for the value of the
2196 variable in question (via a `/' command).
2197 (Also note that the labels SDB prints for various FP stack regs
2198 when doing an `x' command are all wrong.)
2199 Note that these problems generally don't affect the native SVR4
2200 C compiler because it doesn't allow the use of -O with -g and
2201 because when it is *not* optimizing, it allocates a memory
2202 location for each floating-point variable, and the memory
2203 location is what gets described in the DWARF AT_location
2204 attribute for the variable in question.
2205 Regardless of the severe mental illness of the x86/svr4 SDB, we
2206 do something sensible here and we use the following DWARF
2207 register numbers. Note that these are all stack-top-relative
2208 numbers.
2209 11 for %st(0) (gcc regno = 8)
2210 12 for %st(1) (gcc regno = 9)
2211 13 for %st(2) (gcc regno = 10)
2212 14 for %st(3) (gcc regno = 11)
2213 15 for %st(4) (gcc regno = 12)
2214 16 for %st(5) (gcc regno = 13)
2215 17 for %st(6) (gcc regno = 14)
2216 18 for %st(7) (gcc regno = 15)
2218 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2220 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2221 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2222 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2223 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2224 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2226 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2227 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2229 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2232 /* Define parameter passing and return registers. */
2234 static int const x86_64_int_parameter_registers[6] =
2236 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2239 static int const x86_64_ms_abi_int_parameter_registers[4] =
2241 CX_REG, DX_REG, R8_REG, R9_REG
2244 static int const x86_64_int_return_registers[4] =
2246 AX_REG, DX_REG, DI_REG, SI_REG
2249 /* Additional registers that are clobbered by SYSV calls. */
2251 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2253 SI_REG, DI_REG,
2254 XMM6_REG, XMM7_REG,
2255 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2256 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2259 /* Define the structure for the machine field in struct function. */
2261 struct GTY(()) stack_local_entry {
2262 unsigned short mode;
2263 unsigned short n;
2264 rtx rtl;
2265 struct stack_local_entry *next;
2268 /* Structure describing stack frame layout.
2269 Stack grows downward:
2271 [arguments]
2272 <- ARG_POINTER
2273 saved pc
2275 saved static chain if ix86_static_chain_on_stack
2277 saved frame pointer if frame_pointer_needed
2278 <- HARD_FRAME_POINTER
2279 [saved regs]
2280 <- regs_save_offset
2281 [padding0]
2283 [saved SSE regs]
2284 <- sse_regs_save_offset
2285 [padding1] |
2286 | <- FRAME_POINTER
2287 [va_arg registers] |
2289 [frame] |
2291 [padding2] | = to_allocate
2292 <- STACK_POINTER
2294 struct ix86_frame
2296 int nsseregs;
2297 int nregs;
2298 int va_arg_size;
2299 int red_zone_size;
2300 int outgoing_arguments_size;
2302 /* The offsets relative to ARG_POINTER. */
2303 HOST_WIDE_INT frame_pointer_offset;
2304 HOST_WIDE_INT hard_frame_pointer_offset;
2305 HOST_WIDE_INT stack_pointer_offset;
2306 HOST_WIDE_INT hfp_save_offset;
2307 HOST_WIDE_INT reg_save_offset;
2308 HOST_WIDE_INT sse_reg_save_offset;
2310 /* When save_regs_using_mov is set, emit prologue using
2311 move instead of push instructions. */
2312 bool save_regs_using_mov;
2315 /* Which cpu are we scheduling for. */
2316 enum attr_cpu ix86_schedule;
2318 /* Which cpu are we optimizing for. */
2319 enum processor_type ix86_tune;
2321 /* Which instruction set architecture to use. */
2322 enum processor_type ix86_arch;
2324 /* True if processor has SSE prefetch instruction. */
2325 unsigned char x86_prefetch_sse;
2327 /* -mstackrealign option */
2328 static const char ix86_force_align_arg_pointer_string[]
2329 = "force_align_arg_pointer";
2331 static rtx (*ix86_gen_leave) (void);
2332 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2333 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2334 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2335 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2336 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2339 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2344 /* Preferred alignment for stack boundary in bits. */
2345 unsigned int ix86_preferred_stack_boundary;
2347 /* Alignment for incoming stack boundary in bits specified at
2348 command line. */
2349 static unsigned int ix86_user_incoming_stack_boundary;
2351 /* Default alignment for incoming stack boundary in bits. */
2352 static unsigned int ix86_default_incoming_stack_boundary;
2354 /* Alignment for incoming stack boundary in bits. */
2355 unsigned int ix86_incoming_stack_boundary;
2357 /* Calling abi specific va_list type nodes. */
2358 static GTY(()) tree sysv_va_list_type_node;
2359 static GTY(()) tree ms_va_list_type_node;
2361 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2362 char internal_label_prefix[16];
2363 int internal_label_prefix_len;
2365 /* Fence to use after loop using movnt. */
2366 tree x86_mfence;
2368 /* Register class used for passing given 64bit part of the argument.
2369 These represent classes as documented by the PS ABI, with the exception
2370 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2371 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2373 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2374 whenever possible (upper half does contain padding). */
2375 enum x86_64_reg_class
2377 X86_64_NO_CLASS,
2378 X86_64_INTEGER_CLASS,
2379 X86_64_INTEGERSI_CLASS,
2380 X86_64_SSE_CLASS,
2381 X86_64_SSESF_CLASS,
2382 X86_64_SSEDF_CLASS,
2383 X86_64_SSEUP_CLASS,
2384 X86_64_X87_CLASS,
2385 X86_64_X87UP_CLASS,
2386 X86_64_COMPLEX_X87_CLASS,
2387 X86_64_MEMORY_CLASS
2390 #define MAX_CLASSES 8
2392 /* Table of constants used by fldpi, fldln2, etc.... */
2393 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2394 static bool ext_80387_constants_init = 0;
2397 static struct machine_function * ix86_init_machine_status (void);
2398 static rtx ix86_function_value (const_tree, const_tree, bool);
2399 static bool ix86_function_value_regno_p (const unsigned int);
2400 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2401 const_tree);
2402 static rtx ix86_static_chain (const_tree, bool);
2403 static int ix86_function_regparm (const_tree, const_tree);
2404 static void ix86_compute_frame_layout (struct ix86_frame *);
2405 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2406 rtx, rtx, int);
2407 static void ix86_add_new_builtins (HOST_WIDE_INT);
2408 static tree ix86_canonical_va_list_type (tree);
2409 static void predict_jump (int);
2410 static unsigned int split_stack_prologue_scratch_regno (void);
2411 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2413 enum ix86_function_specific_strings
2415 IX86_FUNCTION_SPECIFIC_ARCH,
2416 IX86_FUNCTION_SPECIFIC_TUNE,
2417 IX86_FUNCTION_SPECIFIC_MAX
2420 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2421 const char *, enum fpmath_unit, bool);
2422 static void ix86_function_specific_save (struct cl_target_option *,
2423 struct gcc_options *opts);
2424 static void ix86_function_specific_restore (struct gcc_options *opts,
2425 struct cl_target_option *);
2426 static void ix86_function_specific_print (FILE *, int,
2427 struct cl_target_option *);
2428 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2429 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2430 struct gcc_options *,
2431 struct gcc_options *,
2432 struct gcc_options *);
2433 static bool ix86_can_inline_p (tree, tree);
2434 static void ix86_set_current_function (tree);
2435 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2437 static enum calling_abi ix86_function_abi (const_tree);
2440 #ifndef SUBTARGET32_DEFAULT_CPU
2441 #define SUBTARGET32_DEFAULT_CPU "i386"
2442 #endif
2444 /* Whether -mtune= or -march= were specified */
2445 static int ix86_tune_defaulted;
2446 static int ix86_arch_specified;
2448 /* Vectorization library interface and handlers. */
2449 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2451 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2452 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2454 /* Processor target table, indexed by processor number */
2455 struct ptt
2457 const char *const name; /* processor name */
2458 const struct processor_costs *cost; /* Processor costs */
2459 const int align_loop; /* Default alignments. */
2460 const int align_loop_max_skip;
2461 const int align_jump;
2462 const int align_jump_max_skip;
2463 const int align_func;
2466 /* This table must be in sync with enum processor_type in i386.h. */
2467 static const struct ptt processor_target_table[PROCESSOR_max] =
2469 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2470 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2471 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2472 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2473 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2474 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2475 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2476 {"core2", &core_cost, 16, 10, 16, 10, 16},
2477 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2478 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2479 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2480 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2481 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2482 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2483 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2484 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2485 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2486 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2487 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2488 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2489 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2490 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2491 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2492 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2493 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2496 static bool
2497 gate_insert_vzeroupper (void)
2499 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2502 static unsigned int
2503 rest_of_handle_insert_vzeroupper (void)
2505 int i;
2507 /* vzeroupper instructions are inserted immediately after reload to
2508 account for possible spills from 256bit registers. The pass
2509 reuses mode switching infrastructure by re-running mode insertion
2510 pass, so disable entities that have already been processed. */
2511 for (i = 0; i < MAX_386_ENTITIES; i++)
2512 ix86_optimize_mode_switching[i] = 0;
2514 ix86_optimize_mode_switching[AVX_U128] = 1;
2516 /* Call optimize_mode_switching. */
2517 g->get_passes ()->execute_pass_mode_switching ();
2518 return 0;
2521 namespace {
2523 const pass_data pass_data_insert_vzeroupper =
2525 RTL_PASS, /* type */
2526 "vzeroupper", /* name */
2527 OPTGROUP_NONE, /* optinfo_flags */
2528 true, /* has_gate */
2529 true, /* has_execute */
2530 TV_NONE, /* tv_id */
2531 0, /* properties_required */
2532 0, /* properties_provided */
2533 0, /* properties_destroyed */
2534 0, /* todo_flags_start */
2535 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2538 class pass_insert_vzeroupper : public rtl_opt_pass
2540 public:
2541 pass_insert_vzeroupper(gcc::context *ctxt)
2542 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2545 /* opt_pass methods: */
2546 bool gate () { return gate_insert_vzeroupper (); }
2547 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2549 }; // class pass_insert_vzeroupper
2551 } // anon namespace
2553 rtl_opt_pass *
2554 make_pass_insert_vzeroupper (gcc::context *ctxt)
2556 return new pass_insert_vzeroupper (ctxt);
2559 /* Return true if a red-zone is in use. */
2561 static inline bool
2562 ix86_using_red_zone (void)
2564 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 /* Return a string that documents the current -m options. The caller is
2568 responsible for freeing the string. */
2570 static char *
2571 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2572 const char *tune, enum fpmath_unit fpmath,
2573 bool add_nl_p)
2575 struct ix86_target_opts
2577 const char *option; /* option string */
2578 HOST_WIDE_INT mask; /* isa mask options */
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2585 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2586 { "-mfma", OPTION_MASK_ISA_FMA },
2587 { "-mxop", OPTION_MASK_ISA_XOP },
2588 { "-mlwp", OPTION_MASK_ISA_LWP },
2589 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2590 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2591 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2592 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2593 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2594 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2595 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2596 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2597 { "-msse3", OPTION_MASK_ISA_SSE3 },
2598 { "-msse2", OPTION_MASK_ISA_SSE2 },
2599 { "-msse", OPTION_MASK_ISA_SSE },
2600 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2601 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2602 { "-mmmx", OPTION_MASK_ISA_MMX },
2603 { "-mabm", OPTION_MASK_ISA_ABM },
2604 { "-mbmi", OPTION_MASK_ISA_BMI },
2605 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2606 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2607 { "-mhle", OPTION_MASK_ISA_HLE },
2608 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2609 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2610 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2611 { "-madx", OPTION_MASK_ISA_ADX },
2612 { "-mtbm", OPTION_MASK_ISA_TBM },
2613 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2614 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2615 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2616 { "-maes", OPTION_MASK_ISA_AES },
2617 { "-msha", OPTION_MASK_ISA_SHA },
2618 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2619 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2620 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2621 { "-mf16c", OPTION_MASK_ISA_F16C },
2622 { "-mrtm", OPTION_MASK_ISA_RTM },
2623 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2624 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2625 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2628 /* Flag options. */
2629 static struct ix86_target_opts flag_opts[] =
2631 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2632 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2633 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2634 { "-m80387", MASK_80387 },
2635 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2636 { "-malign-double", MASK_ALIGN_DOUBLE },
2637 { "-mcld", MASK_CLD },
2638 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2639 { "-mieee-fp", MASK_IEEE_FP },
2640 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2641 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2642 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2643 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2644 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2645 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2646 { "-mno-red-zone", MASK_NO_RED_ZONE },
2647 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2648 { "-mrecip", MASK_RECIP },
2649 { "-mrtd", MASK_RTD },
2650 { "-msseregparm", MASK_SSEREGPARM },
2651 { "-mstack-arg-probe", MASK_STACK_PROBE },
2652 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2653 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2654 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2655 { "-mvzeroupper", MASK_VZEROUPPER },
2656 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2657 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2658 { "-mprefer-avx128", MASK_PREFER_AVX128},
2661 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2663 char isa_other[40];
2664 char target_other[40];
2665 unsigned num = 0;
2666 unsigned i, j;
2667 char *ret;
2668 char *ptr;
2669 size_t len;
2670 size_t line_len;
2671 size_t sep_len;
2672 const char *abi;
2674 memset (opts, '\0', sizeof (opts));
2676 /* Add -march= option. */
2677 if (arch)
2679 opts[num][0] = "-march=";
2680 opts[num++][1] = arch;
2683 /* Add -mtune= option. */
2684 if (tune)
2686 opts[num][0] = "-mtune=";
2687 opts[num++][1] = tune;
2690 /* Add -m32/-m64/-mx32. */
2691 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2693 if ((isa & OPTION_MASK_ABI_64) != 0)
2694 abi = "-m64";
2695 else
2696 abi = "-mx32";
2697 isa &= ~ (OPTION_MASK_ISA_64BIT
2698 | OPTION_MASK_ABI_64
2699 | OPTION_MASK_ABI_X32);
2701 else
2702 abi = "-m32";
2703 opts[num++][0] = abi;
2705 /* Pick out the options in isa options. */
2706 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2708 if ((isa & isa_opts[i].mask) != 0)
2710 opts[num++][0] = isa_opts[i].option;
2711 isa &= ~ isa_opts[i].mask;
2715 if (isa && add_nl_p)
2717 opts[num++][0] = isa_other;
2718 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2719 isa);
2722 /* Add flag options. */
2723 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2725 if ((flags & flag_opts[i].mask) != 0)
2727 opts[num++][0] = flag_opts[i].option;
2728 flags &= ~ flag_opts[i].mask;
2732 if (flags && add_nl_p)
2734 opts[num++][0] = target_other;
2735 sprintf (target_other, "(other flags: %#x)", flags);
2738 /* Add -fpmath= option. */
2739 if (fpmath)
2741 opts[num][0] = "-mfpmath=";
2742 switch ((int) fpmath)
2744 case FPMATH_387:
2745 opts[num++][1] = "387";
2746 break;
2748 case FPMATH_SSE:
2749 opts[num++][1] = "sse";
2750 break;
2752 case FPMATH_387 | FPMATH_SSE:
2753 opts[num++][1] = "sse+387";
2754 break;
2756 default:
2757 gcc_unreachable ();
2761 /* Any options? */
2762 if (num == 0)
2763 return NULL;
2765 gcc_assert (num < ARRAY_SIZE (opts));
2767 /* Size the string. */
2768 len = 0;
2769 sep_len = (add_nl_p) ? 3 : 1;
2770 for (i = 0; i < num; i++)
2772 len += sep_len;
2773 for (j = 0; j < 2; j++)
2774 if (opts[i][j])
2775 len += strlen (opts[i][j]);
2778 /* Build the string. */
2779 ret = ptr = (char *) xmalloc (len);
2780 line_len = 0;
2782 for (i = 0; i < num; i++)
2784 size_t len2[2];
2786 for (j = 0; j < 2; j++)
2787 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2789 if (i != 0)
2791 *ptr++ = ' ';
2792 line_len++;
2794 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2796 *ptr++ = '\\';
2797 *ptr++ = '\n';
2798 line_len = 0;
2802 for (j = 0; j < 2; j++)
2803 if (opts[i][j])
2805 memcpy (ptr, opts[i][j], len2[j]);
2806 ptr += len2[j];
2807 line_len += len2[j];
2811 *ptr = '\0';
2812 gcc_assert (ret + len >= ptr);
2814 return ret;
2817 /* Return true, if profiling code should be emitted before
2818 prologue. Otherwise it returns false.
2819 Note: For x86 with "hotfix" it is sorried. */
2820 static bool
2821 ix86_profile_before_prologue (void)
2823 return flag_fentry != 0;
2826 /* Function that is callable from the debugger to print the current
2827 options. */
2828 void ATTRIBUTE_UNUSED
2829 ix86_debug_options (void)
2831 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2832 ix86_arch_string, ix86_tune_string,
2833 ix86_fpmath, true);
2835 if (opts)
2837 fprintf (stderr, "%s\n\n", opts);
2838 free (opts);
2840 else
2841 fputs ("<no options>\n\n", stderr);
2843 return;
2846 static const char *stringop_alg_names[] = {
2847 #define DEF_ENUM
2848 #define DEF_ALG(alg, name) #name,
2849 #include "stringop.def"
2850 #undef DEF_ENUM
2851 #undef DEF_ALG
2854 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2855 The string is of the following form (or comma separated list of it):
2857 strategy_alg:max_size:[align|noalign]
2859 where the full size range for the strategy is either [0, max_size] or
2860 [min_size, max_size], in which min_size is the max_size + 1 of the
2861 preceding range. The last size range must have max_size == -1.
2863 Examples:
2866 -mmemcpy-strategy=libcall:-1:noalign
2868 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2872 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2874 This is to tell the compiler to use the following strategy for memset
2875 1) when the expected size is between [1, 16], use rep_8byte strategy;
2876 2) when the size is between [17, 2048], use vector_loop;
2877 3) when the size is > 2048, use libcall. */
2879 struct stringop_size_range
2881 int max;
2882 stringop_alg alg;
2883 bool noalign;
2886 static void
2887 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2889 const struct stringop_algs *default_algs;
2890 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2891 char *curr_range_str, *next_range_str;
2892 int i = 0, n = 0;
2894 if (is_memset)
2895 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2896 else
2897 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2899 curr_range_str = strategy_str;
2903 int maxs;
2904 char alg_name[128];
2905 char align[16];
2906 next_range_str = strchr (curr_range_str, ',');
2907 if (next_range_str)
2908 *next_range_str++ = '\0';
2910 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2911 alg_name, &maxs, align))
2913 error ("wrong arg %s to option %s", curr_range_str,
2914 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2915 return;
2918 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2920 error ("size ranges of option %s should be increasing",
2921 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2922 return;
2925 for (i = 0; i < last_alg; i++)
2926 if (!strcmp (alg_name, stringop_alg_names[i]))
2927 break;
2929 if (i == last_alg)
2931 error ("wrong stringop strategy name %s specified for option %s",
2932 alg_name,
2933 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2934 return;
2937 if ((stringop_alg) i == rep_prefix_8_byte
2938 && !TARGET_64BIT)
2940 /* rep; movq isn't available in 32-bit code. */
2941 error ("stringop strategy name %s specified for option %s "
2942 "not supported for 32-bit code",
2943 alg_name,
2944 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2945 return;
2948 input_ranges[n].max = maxs;
2949 input_ranges[n].alg = (stringop_alg) i;
2950 if (!strcmp (align, "align"))
2951 input_ranges[n].noalign = false;
2952 else if (!strcmp (align, "noalign"))
2953 input_ranges[n].noalign = true;
2954 else
2956 error ("unknown alignment %s specified for option %s",
2957 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2958 return;
2960 n++;
2961 curr_range_str = next_range_str;
2963 while (curr_range_str);
2965 if (input_ranges[n - 1].max != -1)
2967 error ("the max value for the last size range should be -1"
2968 " for option %s",
2969 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2970 return;
2973 if (n > MAX_STRINGOP_ALGS)
2975 error ("too many size ranges specified in option %s",
2976 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2977 return;
2980 /* Now override the default algs array. */
2981 for (i = 0; i < n; i++)
2983 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2984 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2985 = input_ranges[i].alg;
2986 *const_cast<int *>(&default_algs->size[i].noalign)
2987 = input_ranges[i].noalign;
2992 /* parse -mtune-ctrl= option. When DUMP is true,
2993 print the features that are explicitly set. */
2995 static void
2996 parse_mtune_ctrl_str (bool dump)
2998 if (!ix86_tune_ctrl_string)
2999 return;
3001 char *next_feature_string = NULL;
3002 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3003 char *orig = curr_feature_string;
3004 int i;
3007 bool clear = false;
3009 next_feature_string = strchr (curr_feature_string, ',');
3010 if (next_feature_string)
3011 *next_feature_string++ = '\0';
3012 if (*curr_feature_string == '^')
3014 curr_feature_string++;
3015 clear = true;
3017 for (i = 0; i < X86_TUNE_LAST; i++)
3019 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3021 ix86_tune_features[i] = !clear;
3022 if (dump)
3023 fprintf (stderr, "Explicitly %s feature %s\n",
3024 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3025 break;
3028 if (i == X86_TUNE_LAST)
3029 error ("Unknown parameter to option -mtune-ctrl: %s",
3030 clear ? curr_feature_string - 1 : curr_feature_string);
3031 curr_feature_string = next_feature_string;
3033 while (curr_feature_string);
3034 free (orig);
3037 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3038 processor type. */
3040 static void
3041 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3043 unsigned int ix86_tune_mask = 1u << ix86_tune;
3044 int i;
3046 for (i = 0; i < X86_TUNE_LAST; ++i)
3048 if (ix86_tune_no_default)
3049 ix86_tune_features[i] = 0;
3050 else
3051 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3054 if (dump)
3056 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3057 for (i = 0; i < X86_TUNE_LAST; i++)
3058 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3059 ix86_tune_features[i] ? "on" : "off");
3062 parse_mtune_ctrl_str (dump);
3066 /* Override various settings based on options. If MAIN_ARGS_P, the
3067 options are from the command line, otherwise they are from
3068 attributes. */
3070 static void
3071 ix86_option_override_internal (bool main_args_p,
3072 struct gcc_options *opts,
3073 struct gcc_options *opts_set)
3075 int i;
3076 unsigned int ix86_arch_mask;
3077 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3078 const char *prefix;
3079 const char *suffix;
3080 const char *sw;
3082 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3083 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3084 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3085 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3086 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3087 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3088 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3089 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3090 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3091 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3092 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3093 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3094 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3095 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3096 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3097 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3098 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3099 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3100 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3101 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3102 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3103 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3104 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3105 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3106 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3107 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3108 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3109 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3110 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3111 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3112 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3113 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3114 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3115 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3116 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3117 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3118 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3119 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3120 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3121 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3122 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3123 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3124 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3125 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3126 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3127 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3129 #define PTA_CORE2 \
3130 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3131 | PTA_CX16 | PTA_FXSR)
3132 #define PTA_NEHALEM \
3133 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3134 #define PTA_WESTMERE \
3135 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3136 #define PTA_SANDYBRIDGE \
3137 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3138 #define PTA_IVYBRIDGE \
3139 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3140 #define PTA_HASWELL \
3141 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3142 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3143 #define PTA_BROADWELL \
3144 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3145 #define PTA_BONNELL \
3146 (PTA_CORE2 | PTA_MOVBE)
3147 #define PTA_SILVERMONT \
3148 (PTA_WESTMERE | PTA_MOVBE)
3150 /* if this reaches 64, need to widen struct pta flags below */
3152 static struct pta
3154 const char *const name; /* processor name or nickname. */
3155 const enum processor_type processor;
3156 const enum attr_cpu schedule;
3157 const unsigned HOST_WIDE_INT flags;
3159 const processor_alias_table[] =
3161 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3162 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3163 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3164 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3165 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3166 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3167 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3168 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3169 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3170 PTA_MMX | PTA_SSE | PTA_FXSR},
3171 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3172 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3173 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3174 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3175 PTA_MMX | PTA_SSE | PTA_FXSR},
3176 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3177 PTA_MMX | PTA_SSE | PTA_FXSR},
3178 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3179 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3180 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3181 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3182 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3183 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3184 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3185 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3186 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3187 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3188 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3189 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3190 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3191 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3192 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3193 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3194 PTA_SANDYBRIDGE},
3195 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3196 PTA_SANDYBRIDGE},
3197 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3198 PTA_IVYBRIDGE},
3199 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3200 PTA_IVYBRIDGE},
3201 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3202 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3203 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3204 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3205 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3206 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3207 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3208 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3209 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3210 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3211 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3212 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3213 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3214 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3215 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3216 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3217 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3218 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3219 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3220 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3221 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3222 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3223 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3224 {"x86-64", PROCESSOR_K8, CPU_K8,
3225 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3226 {"k8", PROCESSOR_K8, CPU_K8,
3227 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3228 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3229 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3232 {"opteron", PROCESSOR_K8, CPU_K8,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3235 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3236 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3237 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3238 {"athlon64", PROCESSOR_K8, CPU_K8,
3239 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3240 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3241 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3243 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3244 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3249 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3250 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3252 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3253 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3254 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3255 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3256 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3257 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3258 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3259 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3260 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3261 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3262 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3263 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3264 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3265 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3266 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3267 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3268 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3269 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3270 | PTA_XSAVEOPT | PTA_FSGSBASE},
3271 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3272 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3273 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3274 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3275 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3276 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3277 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3278 | PTA_MOVBE},
3279 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3280 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3281 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3282 | PTA_FXSR | PTA_XSAVE},
3283 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3284 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3285 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3286 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3287 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3288 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3290 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3291 PTA_64BIT
3292 | PTA_HLE /* flags are only used for -march switch. */ },
3295 /* -mrecip options. */
3296 static struct
3298 const char *string; /* option name */
3299 unsigned int mask; /* mask bits to set */
3301 const recip_options[] =
3303 { "all", RECIP_MASK_ALL },
3304 { "none", RECIP_MASK_NONE },
3305 { "div", RECIP_MASK_DIV },
3306 { "sqrt", RECIP_MASK_SQRT },
3307 { "vec-div", RECIP_MASK_VEC_DIV },
3308 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3311 int const pta_size = ARRAY_SIZE (processor_alias_table);
3313 /* Set up prefix/suffix so the error messages refer to either the command
3314 line argument, or the attribute(target). */
3315 if (main_args_p)
3317 prefix = "-m";
3318 suffix = "";
3319 sw = "switch";
3321 else
3323 prefix = "option(\"";
3324 suffix = "\")";
3325 sw = "attribute";
3328 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3329 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3330 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3331 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3332 #ifdef TARGET_BI_ARCH
3333 else
3335 #if TARGET_BI_ARCH == 1
3336 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3337 is on and OPTION_MASK_ABI_X32 is off. We turn off
3338 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3339 -mx32. */
3340 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3341 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3342 #else
3343 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3344 on and OPTION_MASK_ABI_64 is off. We turn off
3345 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3346 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3347 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3348 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3349 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3350 #endif
3352 #endif
3354 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3356 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3357 OPTION_MASK_ABI_64 for TARGET_X32. */
3358 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3359 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3361 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3362 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3363 | OPTION_MASK_ABI_X32
3364 | OPTION_MASK_ABI_64);
3365 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3367 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3368 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3369 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3370 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3373 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3374 SUBTARGET_OVERRIDE_OPTIONS;
3375 #endif
3377 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3378 SUBSUBTARGET_OVERRIDE_OPTIONS;
3379 #endif
3381 /* -fPIC is the default for x86_64. */
3382 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3383 opts->x_flag_pic = 2;
3385 /* Need to check -mtune=generic first. */
3386 if (opts->x_ix86_tune_string)
3388 /* As special support for cross compilers we read -mtune=native
3389 as -mtune=generic. With native compilers we won't see the
3390 -mtune=native, as it was changed by the driver. */
3391 if (!strcmp (opts->x_ix86_tune_string, "native"))
3393 opts->x_ix86_tune_string = "generic";
3395 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3396 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3397 "%stune=k8%s or %stune=generic%s instead as appropriate",
3398 prefix, suffix, prefix, suffix, prefix, suffix);
3400 else
3402 if (opts->x_ix86_arch_string)
3403 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3404 if (!opts->x_ix86_tune_string)
3406 opts->x_ix86_tune_string
3407 = processor_target_table[TARGET_CPU_DEFAULT].name;
3408 ix86_tune_defaulted = 1;
3411 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3412 or defaulted. We need to use a sensible tune option. */
3413 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3415 opts->x_ix86_tune_string = "generic";
3419 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3420 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3422 /* rep; movq isn't available in 32-bit code. */
3423 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3424 opts->x_ix86_stringop_alg = no_stringop;
3427 if (!opts->x_ix86_arch_string)
3428 opts->x_ix86_arch_string
3429 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3430 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3431 else
3432 ix86_arch_specified = 1;
3434 if (opts_set->x_ix86_pmode)
3436 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3437 && opts->x_ix86_pmode == PMODE_SI)
3438 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3439 && opts->x_ix86_pmode == PMODE_DI))
3440 error ("address mode %qs not supported in the %s bit mode",
3441 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3442 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3444 else
3445 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3446 ? PMODE_DI : PMODE_SI;
3448 if (!opts_set->x_ix86_abi)
3449 opts->x_ix86_abi = DEFAULT_ABI;
3451 /* For targets using ms ABI enable ms-extensions, if not
3452 explicit turned off. For non-ms ABI we turn off this
3453 option. */
3454 if (!opts_set->x_flag_ms_extensions)
3455 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3457 if (opts_set->x_ix86_cmodel)
3459 switch (opts->x_ix86_cmodel)
3461 case CM_SMALL:
3462 case CM_SMALL_PIC:
3463 if (opts->x_flag_pic)
3464 opts->x_ix86_cmodel = CM_SMALL_PIC;
3465 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3466 error ("code model %qs not supported in the %s bit mode",
3467 "small", "32");
3468 break;
3470 case CM_MEDIUM:
3471 case CM_MEDIUM_PIC:
3472 if (opts->x_flag_pic)
3473 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3474 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3475 error ("code model %qs not supported in the %s bit mode",
3476 "medium", "32");
3477 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3478 error ("code model %qs not supported in x32 mode",
3479 "medium");
3480 break;
3482 case CM_LARGE:
3483 case CM_LARGE_PIC:
3484 if (opts->x_flag_pic)
3485 opts->x_ix86_cmodel = CM_LARGE_PIC;
3486 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3487 error ("code model %qs not supported in the %s bit mode",
3488 "large", "32");
3489 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3490 error ("code model %qs not supported in x32 mode",
3491 "large");
3492 break;
3494 case CM_32:
3495 if (opts->x_flag_pic)
3496 error ("code model %s does not support PIC mode", "32");
3497 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3498 error ("code model %qs not supported in the %s bit mode",
3499 "32", "64");
3500 break;
3502 case CM_KERNEL:
3503 if (opts->x_flag_pic)
3505 error ("code model %s does not support PIC mode", "kernel");
3506 opts->x_ix86_cmodel = CM_32;
3508 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3509 error ("code model %qs not supported in the %s bit mode",
3510 "kernel", "32");
3511 break;
3513 default:
3514 gcc_unreachable ();
3517 else
3519 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3520 use of rip-relative addressing. This eliminates fixups that
3521 would otherwise be needed if this object is to be placed in a
3522 DLL, and is essentially just as efficient as direct addressing. */
3523 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3524 && (TARGET_RDOS || TARGET_PECOFF))
3525 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3526 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3527 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3528 else
3529 opts->x_ix86_cmodel = CM_32;
3531 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3533 error ("-masm=intel not supported in this configuration");
3534 opts->x_ix86_asm_dialect = ASM_ATT;
3536 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3537 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3538 sorry ("%i-bit mode not compiled in",
3539 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3541 for (i = 0; i < pta_size; i++)
3542 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3544 ix86_schedule = processor_alias_table[i].schedule;
3545 ix86_arch = processor_alias_table[i].processor;
3546 /* Default cpu tuning to the architecture. */
3547 ix86_tune = ix86_arch;
3549 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3550 && !(processor_alias_table[i].flags & PTA_64BIT))
3551 error ("CPU you selected does not support x86-64 "
3552 "instruction set");
3554 if (processor_alias_table[i].flags & PTA_MMX
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3557 if (processor_alias_table[i].flags & PTA_3DNOW
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3560 if (processor_alias_table[i].flags & PTA_3DNOW_A
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3563 if (processor_alias_table[i].flags & PTA_SSE
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3566 if (processor_alias_table[i].flags & PTA_SSE2
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3569 if (processor_alias_table[i].flags & PTA_SSE3
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3572 if (processor_alias_table[i].flags & PTA_SSSE3
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3575 if (processor_alias_table[i].flags & PTA_SSE4_1
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3578 if (processor_alias_table[i].flags & PTA_SSE4_2
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3581 if (processor_alias_table[i].flags & PTA_AVX
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3584 if (processor_alias_table[i].flags & PTA_AVX2
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3587 if (processor_alias_table[i].flags & PTA_FMA
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3590 if (processor_alias_table[i].flags & PTA_SSE4A
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3593 if (processor_alias_table[i].flags & PTA_FMA4
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3596 if (processor_alias_table[i].flags & PTA_XOP
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3599 if (processor_alias_table[i].flags & PTA_LWP
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3602 if (processor_alias_table[i].flags & PTA_ABM
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3605 if (processor_alias_table[i].flags & PTA_BMI
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3608 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3611 if (processor_alias_table[i].flags & PTA_TBM
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3614 if (processor_alias_table[i].flags & PTA_BMI2
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3617 if (processor_alias_table[i].flags & PTA_CX16
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3620 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3623 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3624 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3627 if (processor_alias_table[i].flags & PTA_MOVBE
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3630 if (processor_alias_table[i].flags & PTA_AES
3631 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3632 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3633 if (processor_alias_table[i].flags & PTA_SHA
3634 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3635 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3636 if (processor_alias_table[i].flags & PTA_PCLMUL
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3639 if (processor_alias_table[i].flags & PTA_FSGSBASE
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3642 if (processor_alias_table[i].flags & PTA_RDRND
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3645 if (processor_alias_table[i].flags & PTA_F16C
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3648 if (processor_alias_table[i].flags & PTA_RTM
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3651 if (processor_alias_table[i].flags & PTA_HLE
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3654 if (processor_alias_table[i].flags & PTA_PRFCHW
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3657 if (processor_alias_table[i].flags & PTA_RDSEED
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3660 if (processor_alias_table[i].flags & PTA_ADX
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3663 if (processor_alias_table[i].flags & PTA_FXSR
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3666 if (processor_alias_table[i].flags & PTA_XSAVE
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3669 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3672 if (processor_alias_table[i].flags & PTA_AVX512F
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3675 if (processor_alias_table[i].flags & PTA_AVX512ER
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3678 if (processor_alias_table[i].flags & PTA_AVX512PF
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3681 if (processor_alias_table[i].flags & PTA_AVX512CD
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3684 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3687 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3688 x86_prefetch_sse = true;
3690 break;
3693 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3694 error ("generic CPU can be used only for %stune=%s %s",
3695 prefix, suffix, sw);
3696 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3697 error ("intel CPU can be used only for %stune=%s %s",
3698 prefix, suffix, sw);
3699 else if (i == pta_size)
3700 error ("bad value (%s) for %sarch=%s %s",
3701 opts->x_ix86_arch_string, prefix, suffix, sw);
3703 ix86_arch_mask = 1u << ix86_arch;
3704 for (i = 0; i < X86_ARCH_LAST; ++i)
3705 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3707 for (i = 0; i < pta_size; i++)
3708 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3710 ix86_schedule = processor_alias_table[i].schedule;
3711 ix86_tune = processor_alias_table[i].processor;
3712 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3714 if (!(processor_alias_table[i].flags & PTA_64BIT))
3716 if (ix86_tune_defaulted)
3718 opts->x_ix86_tune_string = "x86-64";
3719 for (i = 0; i < pta_size; i++)
3720 if (! strcmp (opts->x_ix86_tune_string,
3721 processor_alias_table[i].name))
3722 break;
3723 ix86_schedule = processor_alias_table[i].schedule;
3724 ix86_tune = processor_alias_table[i].processor;
3726 else
3727 error ("CPU you selected does not support x86-64 "
3728 "instruction set");
3731 /* Intel CPUs have always interpreted SSE prefetch instructions as
3732 NOPs; so, we can enable SSE prefetch instructions even when
3733 -mtune (rather than -march) points us to a processor that has them.
3734 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3735 higher processors. */
3736 if (TARGET_CMOV
3737 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3738 x86_prefetch_sse = true;
3739 break;
3742 if (ix86_tune_specified && i == pta_size)
3743 error ("bad value (%s) for %stune=%s %s",
3744 opts->x_ix86_tune_string, prefix, suffix, sw);
3746 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3748 #ifndef USE_IX86_FRAME_POINTER
3749 #define USE_IX86_FRAME_POINTER 0
3750 #endif
3752 #ifndef USE_X86_64_FRAME_POINTER
3753 #define USE_X86_64_FRAME_POINTER 0
3754 #endif
3756 /* Set the default values for switches whose default depends on TARGET_64BIT
3757 in case they weren't overwritten by command line options. */
3758 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3760 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3761 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3762 if (opts->x_flag_asynchronous_unwind_tables
3763 && !opts_set->x_flag_unwind_tables
3764 && TARGET_64BIT_MS_ABI)
3765 opts->x_flag_unwind_tables = 1;
3766 if (opts->x_flag_asynchronous_unwind_tables == 2)
3767 opts->x_flag_unwind_tables
3768 = opts->x_flag_asynchronous_unwind_tables = 1;
3769 if (opts->x_flag_pcc_struct_return == 2)
3770 opts->x_flag_pcc_struct_return = 0;
3772 else
3774 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3775 opts->x_flag_omit_frame_pointer
3776 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3777 if (opts->x_flag_asynchronous_unwind_tables == 2)
3778 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3779 if (opts->x_flag_pcc_struct_return == 2)
3780 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3783 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3784 if (opts->x_optimize_size)
3785 ix86_cost = &ix86_size_cost;
3786 else
3787 ix86_cost = ix86_tune_cost;
3789 /* Arrange to set up i386_stack_locals for all functions. */
3790 init_machine_status = ix86_init_machine_status;
3792 /* Validate -mregparm= value. */
3793 if (opts_set->x_ix86_regparm)
3795 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3796 warning (0, "-mregparm is ignored in 64-bit mode");
3797 if (opts->x_ix86_regparm > REGPARM_MAX)
3799 error ("-mregparm=%d is not between 0 and %d",
3800 opts->x_ix86_regparm, REGPARM_MAX);
3801 opts->x_ix86_regparm = 0;
3804 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3805 opts->x_ix86_regparm = REGPARM_MAX;
3807 /* Default align_* from the processor table. */
3808 if (opts->x_align_loops == 0)
3810 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3811 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3813 if (opts->x_align_jumps == 0)
3815 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3816 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3818 if (opts->x_align_functions == 0)
3820 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3823 /* Provide default for -mbranch-cost= value. */
3824 if (!opts_set->x_ix86_branch_cost)
3825 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3827 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3829 opts->x_target_flags
3830 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3832 /* Enable by default the SSE and MMX builtins. Do allow the user to
3833 explicitly disable any of these. In particular, disabling SSE and
3834 MMX for kernel code is extremely useful. */
3835 if (!ix86_arch_specified)
3836 opts->x_ix86_isa_flags
3837 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3838 | TARGET_SUBTARGET64_ISA_DEFAULT)
3839 & ~opts->x_ix86_isa_flags_explicit);
3841 if (TARGET_RTD_P (opts->x_target_flags))
3842 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3844 else
3846 opts->x_target_flags
3847 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3849 if (!ix86_arch_specified)
3850 opts->x_ix86_isa_flags
3851 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3853 /* i386 ABI does not specify red zone. It still makes sense to use it
3854 when programmer takes care to stack from being destroyed. */
3855 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3856 opts->x_target_flags |= MASK_NO_RED_ZONE;
3859 /* Keep nonleaf frame pointers. */
3860 if (opts->x_flag_omit_frame_pointer)
3861 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3862 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3863 opts->x_flag_omit_frame_pointer = 1;
3865 /* If we're doing fast math, we don't care about comparison order
3866 wrt NaNs. This lets us use a shorter comparison sequence. */
3867 if (opts->x_flag_finite_math_only)
3868 opts->x_target_flags &= ~MASK_IEEE_FP;
3870 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3871 since the insns won't need emulation. */
3872 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3873 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3875 /* Likewise, if the target doesn't have a 387, or we've specified
3876 software floating point, don't use 387 inline intrinsics. */
3877 if (!TARGET_80387_P (opts->x_target_flags))
3878 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3880 /* Turn on MMX builtins for -msse. */
3881 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3882 opts->x_ix86_isa_flags
3883 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3885 /* Enable SSE prefetch. */
3886 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3887 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3888 x86_prefetch_sse = true;
3890 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3891 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3892 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3893 opts->x_ix86_isa_flags
3894 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3896 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3897 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3898 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3899 opts->x_ix86_isa_flags
3900 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3902 /* Enable lzcnt instruction for -mabm. */
3903 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3904 opts->x_ix86_isa_flags
3905 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3907 /* Validate -mpreferred-stack-boundary= value or default it to
3908 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3909 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3910 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3912 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3913 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3914 int max = (TARGET_SEH ? 4 : 12);
3916 if (opts->x_ix86_preferred_stack_boundary_arg < min
3917 || opts->x_ix86_preferred_stack_boundary_arg > max)
3919 if (min == max)
3920 error ("-mpreferred-stack-boundary is not supported "
3921 "for this target");
3922 else
3923 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3924 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3926 else
3927 ix86_preferred_stack_boundary
3928 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3931 /* Set the default value for -mstackrealign. */
3932 if (opts->x_ix86_force_align_arg_pointer == -1)
3933 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3935 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3937 /* Validate -mincoming-stack-boundary= value or default it to
3938 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3939 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3940 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3942 if (opts->x_ix86_incoming_stack_boundary_arg
3943 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3944 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3945 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3946 opts->x_ix86_incoming_stack_boundary_arg,
3947 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3948 else
3950 ix86_user_incoming_stack_boundary
3951 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3952 ix86_incoming_stack_boundary
3953 = ix86_user_incoming_stack_boundary;
3957 /* Accept -msseregparm only if at least SSE support is enabled. */
3958 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3959 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3960 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3962 if (opts_set->x_ix86_fpmath)
3964 if (opts->x_ix86_fpmath & FPMATH_SSE)
3966 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3968 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3969 opts->x_ix86_fpmath = FPMATH_387;
3971 else if ((opts->x_ix86_fpmath & FPMATH_387)
3972 && !TARGET_80387_P (opts->x_target_flags))
3974 warning (0, "387 instruction set disabled, using SSE arithmetics");
3975 opts->x_ix86_fpmath = FPMATH_SSE;
3979 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3980 fpmath=387. The second is however default at many targets since the
3981 extra 80bit precision of temporaries is considered to be part of ABI.
3982 Overwrite the default at least for -ffast-math.
3983 TODO: -mfpmath=both seems to produce same performing code with bit
3984 smaller binaries. It is however not clear if register allocation is
3985 ready for this setting.
3986 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3987 codegen. We may switch to 387 with -ffast-math for size optimized
3988 functions. */
3989 else if (fast_math_flags_set_p (&global_options)
3990 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3991 opts->x_ix86_fpmath = FPMATH_SSE;
3992 else
3993 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3995 /* If the i387 is disabled, then do not return values in it. */
3996 if (!TARGET_80387_P (opts->x_target_flags))
3997 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3999 /* Use external vectorized library in vectorizing intrinsics. */
4000 if (opts_set->x_ix86_veclibabi_type)
4001 switch (opts->x_ix86_veclibabi_type)
4003 case ix86_veclibabi_type_svml:
4004 ix86_veclib_handler = ix86_veclibabi_svml;
4005 break;
4007 case ix86_veclibabi_type_acml:
4008 ix86_veclib_handler = ix86_veclibabi_acml;
4009 break;
4011 default:
4012 gcc_unreachable ();
4015 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4016 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4017 && !opts->x_optimize_size)
4018 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4020 /* If stack probes are required, the space used for large function
4021 arguments on the stack must also be probed, so enable
4022 -maccumulate-outgoing-args so this happens in the prologue. */
4023 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4024 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4026 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4027 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4028 "for correctness", prefix, suffix);
4029 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4032 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4034 char *p;
4035 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4036 p = strchr (internal_label_prefix, 'X');
4037 internal_label_prefix_len = p - internal_label_prefix;
4038 *p = '\0';
4041 /* When scheduling description is not available, disable scheduler pass
4042 so it won't slow down the compilation and make x87 code slower. */
4043 if (!TARGET_SCHEDULE)
4044 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4046 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4047 ix86_tune_cost->simultaneous_prefetches,
4048 opts->x_param_values,
4049 opts_set->x_param_values);
4050 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4051 ix86_tune_cost->prefetch_block,
4052 opts->x_param_values,
4053 opts_set->x_param_values);
4054 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4055 ix86_tune_cost->l1_cache_size,
4056 opts->x_param_values,
4057 opts_set->x_param_values);
4058 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4059 ix86_tune_cost->l2_cache_size,
4060 opts->x_param_values,
4061 opts_set->x_param_values);
4063 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4064 if (opts->x_flag_prefetch_loop_arrays < 0
4065 && HAVE_prefetch
4066 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4067 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4068 opts->x_flag_prefetch_loop_arrays = 1;
4070 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4071 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4072 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4073 targetm.expand_builtin_va_start = NULL;
4075 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4077 ix86_gen_leave = gen_leave_rex64;
4078 if (Pmode == DImode)
4080 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4081 ix86_gen_tls_local_dynamic_base_64
4082 = gen_tls_local_dynamic_base_64_di;
4084 else
4086 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4087 ix86_gen_tls_local_dynamic_base_64
4088 = gen_tls_local_dynamic_base_64_si;
4091 else
4092 ix86_gen_leave = gen_leave;
4094 if (Pmode == DImode)
4096 ix86_gen_add3 = gen_adddi3;
4097 ix86_gen_sub3 = gen_subdi3;
4098 ix86_gen_sub3_carry = gen_subdi3_carry;
4099 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4100 ix86_gen_andsp = gen_anddi3;
4101 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4102 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4103 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4104 ix86_gen_monitor = gen_sse3_monitor_di;
4106 else
4108 ix86_gen_add3 = gen_addsi3;
4109 ix86_gen_sub3 = gen_subsi3;
4110 ix86_gen_sub3_carry = gen_subsi3_carry;
4111 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4112 ix86_gen_andsp = gen_andsi3;
4113 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4114 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4115 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4116 ix86_gen_monitor = gen_sse3_monitor_si;
4119 #ifdef USE_IX86_CLD
4120 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4121 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4122 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4123 #endif
4125 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4127 if (opts->x_flag_fentry > 0)
4128 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4129 "with -fpic");
4130 opts->x_flag_fentry = 0;
4132 else if (TARGET_SEH)
4134 if (opts->x_flag_fentry == 0)
4135 sorry ("-mno-fentry isn%'t compatible with SEH");
4136 opts->x_flag_fentry = 1;
4138 else if (opts->x_flag_fentry < 0)
4140 #if defined(PROFILE_BEFORE_PROLOGUE)
4141 opts->x_flag_fentry = 1;
4142 #else
4143 opts->x_flag_fentry = 0;
4144 #endif
4147 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4148 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4149 AVX unaligned load/store. */
4150 if (!opts->x_optimize_size)
4152 if (flag_expensive_optimizations
4153 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4154 opts->x_target_flags |= MASK_VZEROUPPER;
4155 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4156 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4157 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4158 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4159 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4160 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4161 /* Enable 128-bit AVX instruction generation
4162 for the auto-vectorizer. */
4163 if (TARGET_AVX128_OPTIMAL
4164 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4165 opts->x_target_flags |= MASK_PREFER_AVX128;
4168 if (opts->x_ix86_recip_name)
4170 char *p = ASTRDUP (opts->x_ix86_recip_name);
4171 char *q;
4172 unsigned int mask, i;
4173 bool invert;
4175 while ((q = strtok (p, ",")) != NULL)
4177 p = NULL;
4178 if (*q == '!')
4180 invert = true;
4181 q++;
4183 else
4184 invert = false;
4186 if (!strcmp (q, "default"))
4187 mask = RECIP_MASK_ALL;
4188 else
4190 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4191 if (!strcmp (q, recip_options[i].string))
4193 mask = recip_options[i].mask;
4194 break;
4197 if (i == ARRAY_SIZE (recip_options))
4199 error ("unknown option for -mrecip=%s", q);
4200 invert = false;
4201 mask = RECIP_MASK_NONE;
4205 opts->x_recip_mask_explicit |= mask;
4206 if (invert)
4207 opts->x_recip_mask &= ~mask;
4208 else
4209 opts->x_recip_mask |= mask;
4213 if (TARGET_RECIP_P (opts->x_target_flags))
4214 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4215 else if (opts_set->x_target_flags & MASK_RECIP)
4216 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4218 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4219 for 64-bit Bionic. */
4220 if (TARGET_HAS_BIONIC
4221 && !(opts_set->x_target_flags
4222 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4223 opts->x_target_flags |= (TARGET_64BIT
4224 ? MASK_LONG_DOUBLE_128
4225 : MASK_LONG_DOUBLE_64);
4227 /* Only one of them can be active. */
4228 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4229 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4231 /* Save the initial options in case the user does function specific
4232 options. */
4233 if (main_args_p)
4234 target_option_default_node = target_option_current_node
4235 = build_target_option_node (opts);
4237 /* Handle stack protector */
4238 if (!opts_set->x_ix86_stack_protector_guard)
4239 opts->x_ix86_stack_protector_guard
4240 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4242 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4243 if (opts->x_ix86_tune_memcpy_strategy)
4245 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4246 ix86_parse_stringop_strategy_string (str, false);
4247 free (str);
4250 if (opts->x_ix86_tune_memset_strategy)
4252 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4253 ix86_parse_stringop_strategy_string (str, true);
4254 free (str);
4258 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4260 static void
4261 ix86_option_override (void)
4263 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4264 static struct register_pass_info insert_vzeroupper_info
4265 = { pass_insert_vzeroupper, "reload",
4266 1, PASS_POS_INSERT_AFTER
4269 ix86_option_override_internal (true, &global_options, &global_options_set);
4272 /* This needs to be done at start up. It's convenient to do it here. */
4273 register_pass (&insert_vzeroupper_info);
4276 /* Update register usage after having seen the compiler flags. */
4278 static void
4279 ix86_conditional_register_usage (void)
4281 int i, c_mask;
4282 unsigned int j;
4284 /* The PIC register, if it exists, is fixed. */
4285 j = PIC_OFFSET_TABLE_REGNUM;
4286 if (j != INVALID_REGNUM)
4287 fixed_regs[j] = call_used_regs[j] = 1;
4289 /* For 32-bit targets, squash the REX registers. */
4290 if (! TARGET_64BIT)
4292 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4293 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4294 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4295 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4296 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4297 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4300 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4301 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4302 : TARGET_64BIT ? (1 << 2)
4303 : (1 << 1));
4305 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4307 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4309 /* Set/reset conditionally defined registers from
4310 CALL_USED_REGISTERS initializer. */
4311 if (call_used_regs[i] > 1)
4312 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4314 /* Calculate registers of CLOBBERED_REGS register set
4315 as call used registers from GENERAL_REGS register set. */
4316 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4317 && call_used_regs[i])
4318 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4321 /* If MMX is disabled, squash the registers. */
4322 if (! TARGET_MMX)
4323 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4324 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4325 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4327 /* If SSE is disabled, squash the registers. */
4328 if (! TARGET_SSE)
4329 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4330 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4331 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4333 /* If the FPU is disabled, squash the registers. */
4334 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4335 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4337 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4339 /* If AVX512F is disabled, squash the registers. */
4340 if (! TARGET_AVX512F)
4342 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4343 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4345 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4346 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4351 /* Save the current options */
4353 static void
4354 ix86_function_specific_save (struct cl_target_option *ptr,
4355 struct gcc_options *opts)
4357 ptr->arch = ix86_arch;
4358 ptr->schedule = ix86_schedule;
4359 ptr->tune = ix86_tune;
4360 ptr->branch_cost = ix86_branch_cost;
4361 ptr->tune_defaulted = ix86_tune_defaulted;
4362 ptr->arch_specified = ix86_arch_specified;
4363 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4364 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4365 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4366 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4367 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4368 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4369 ptr->x_ix86_abi = opts->x_ix86_abi;
4370 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4371 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4372 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4373 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4374 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4375 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4376 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4377 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4378 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4379 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4380 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4381 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4382 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4383 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4384 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4385 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4386 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4387 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4388 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4389 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4391 /* The fields are char but the variables are not; make sure the
4392 values fit in the fields. */
4393 gcc_assert (ptr->arch == ix86_arch);
4394 gcc_assert (ptr->schedule == ix86_schedule);
4395 gcc_assert (ptr->tune == ix86_tune);
4396 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4399 /* Restore the current options */
4401 static void
4402 ix86_function_specific_restore (struct gcc_options *opts,
4403 struct cl_target_option *ptr)
4405 enum processor_type old_tune = ix86_tune;
4406 enum processor_type old_arch = ix86_arch;
4407 unsigned int ix86_arch_mask;
4408 int i;
4410 /* We don't change -fPIC. */
4411 opts->x_flag_pic = flag_pic;
4413 ix86_arch = (enum processor_type) ptr->arch;
4414 ix86_schedule = (enum attr_cpu) ptr->schedule;
4415 ix86_tune = (enum processor_type) ptr->tune;
4416 opts->x_ix86_branch_cost = ptr->branch_cost;
4417 ix86_tune_defaulted = ptr->tune_defaulted;
4418 ix86_arch_specified = ptr->arch_specified;
4419 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4420 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4421 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4422 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4423 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4424 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4425 opts->x_ix86_abi = ptr->x_ix86_abi;
4426 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4427 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4428 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4429 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4430 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4431 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4432 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4433 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4434 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4435 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4436 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4437 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4438 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4439 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4440 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4441 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4442 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4443 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4444 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4445 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4447 /* Recreate the arch feature tests if the arch changed */
4448 if (old_arch != ix86_arch)
4450 ix86_arch_mask = 1u << ix86_arch;
4451 for (i = 0; i < X86_ARCH_LAST; ++i)
4452 ix86_arch_features[i]
4453 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4456 /* Recreate the tune optimization tests */
4457 if (old_tune != ix86_tune)
4458 set_ix86_tune_features (ix86_tune, false);
4461 /* Print the current options */
4463 static void
4464 ix86_function_specific_print (FILE *file, int indent,
4465 struct cl_target_option *ptr)
4467 char *target_string
4468 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4469 NULL, NULL, ptr->x_ix86_fpmath, false);
4471 gcc_assert (ptr->arch < PROCESSOR_max);
4472 fprintf (file, "%*sarch = %d (%s)\n",
4473 indent, "",
4474 ptr->arch, processor_target_table[ptr->arch].name);
4476 gcc_assert (ptr->tune < PROCESSOR_max);
4477 fprintf (file, "%*stune = %d (%s)\n",
4478 indent, "",
4479 ptr->tune, processor_target_table[ptr->tune].name);
4481 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4483 if (target_string)
4485 fprintf (file, "%*s%s\n", indent, "", target_string);
4486 free (target_string);
4491 /* Inner function to process the attribute((target(...))), take an argument and
4492 set the current options from the argument. If we have a list, recursively go
4493 over the list. */
4495 static bool
4496 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4497 struct gcc_options *opts,
4498 struct gcc_options *opts_set,
4499 struct gcc_options *enum_opts_set)
4501 char *next_optstr;
4502 bool ret = true;
4504 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4505 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4506 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4507 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4508 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4510 enum ix86_opt_type
4512 ix86_opt_unknown,
4513 ix86_opt_yes,
4514 ix86_opt_no,
4515 ix86_opt_str,
4516 ix86_opt_enum,
4517 ix86_opt_isa
4520 static const struct
4522 const char *string;
4523 size_t len;
4524 enum ix86_opt_type type;
4525 int opt;
4526 int mask;
4527 } attrs[] = {
4528 /* isa options */
4529 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4530 IX86_ATTR_ISA ("abm", OPT_mabm),
4531 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4532 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4533 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4534 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4535 IX86_ATTR_ISA ("aes", OPT_maes),
4536 IX86_ATTR_ISA ("sha", OPT_msha),
4537 IX86_ATTR_ISA ("avx", OPT_mavx),
4538 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4539 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4540 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4541 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4542 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4543 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4544 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4545 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4546 IX86_ATTR_ISA ("sse", OPT_msse),
4547 IX86_ATTR_ISA ("sse2", OPT_msse2),
4548 IX86_ATTR_ISA ("sse3", OPT_msse3),
4549 IX86_ATTR_ISA ("sse4", OPT_msse4),
4550 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4551 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4552 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4553 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4554 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4555 IX86_ATTR_ISA ("fma", OPT_mfma),
4556 IX86_ATTR_ISA ("xop", OPT_mxop),
4557 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4558 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4559 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4560 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4561 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4562 IX86_ATTR_ISA ("hle", OPT_mhle),
4563 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4564 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4565 IX86_ATTR_ISA ("adx", OPT_madx),
4566 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4567 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4568 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4569 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4571 /* enum options */
4572 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4574 /* string options */
4575 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4576 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4578 /* flag options */
4579 IX86_ATTR_YES ("cld",
4580 OPT_mcld,
4581 MASK_CLD),
4583 IX86_ATTR_NO ("fancy-math-387",
4584 OPT_mfancy_math_387,
4585 MASK_NO_FANCY_MATH_387),
4587 IX86_ATTR_YES ("ieee-fp",
4588 OPT_mieee_fp,
4589 MASK_IEEE_FP),
4591 IX86_ATTR_YES ("inline-all-stringops",
4592 OPT_minline_all_stringops,
4593 MASK_INLINE_ALL_STRINGOPS),
4595 IX86_ATTR_YES ("inline-stringops-dynamically",
4596 OPT_minline_stringops_dynamically,
4597 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4599 IX86_ATTR_NO ("align-stringops",
4600 OPT_mno_align_stringops,
4601 MASK_NO_ALIGN_STRINGOPS),
4603 IX86_ATTR_YES ("recip",
4604 OPT_mrecip,
4605 MASK_RECIP),
4609 /* If this is a list, recurse to get the options. */
4610 if (TREE_CODE (args) == TREE_LIST)
4612 bool ret = true;
4614 for (; args; args = TREE_CHAIN (args))
4615 if (TREE_VALUE (args)
4616 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4617 p_strings, opts, opts_set,
4618 enum_opts_set))
4619 ret = false;
4621 return ret;
4624 else if (TREE_CODE (args) != STRING_CST)
4626 error ("attribute %<target%> argument not a string");
4627 return false;
4630 /* Handle multiple arguments separated by commas. */
4631 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4633 while (next_optstr && *next_optstr != '\0')
4635 char *p = next_optstr;
4636 char *orig_p = p;
4637 char *comma = strchr (next_optstr, ',');
4638 const char *opt_string;
4639 size_t len, opt_len;
4640 int opt;
4641 bool opt_set_p;
4642 char ch;
4643 unsigned i;
4644 enum ix86_opt_type type = ix86_opt_unknown;
4645 int mask = 0;
4647 if (comma)
4649 *comma = '\0';
4650 len = comma - next_optstr;
4651 next_optstr = comma + 1;
4653 else
4655 len = strlen (p);
4656 next_optstr = NULL;
4659 /* Recognize no-xxx. */
4660 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4662 opt_set_p = false;
4663 p += 3;
4664 len -= 3;
4666 else
4667 opt_set_p = true;
4669 /* Find the option. */
4670 ch = *p;
4671 opt = N_OPTS;
4672 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4674 type = attrs[i].type;
4675 opt_len = attrs[i].len;
4676 if (ch == attrs[i].string[0]
4677 && ((type != ix86_opt_str && type != ix86_opt_enum)
4678 ? len == opt_len
4679 : len > opt_len)
4680 && memcmp (p, attrs[i].string, opt_len) == 0)
4682 opt = attrs[i].opt;
4683 mask = attrs[i].mask;
4684 opt_string = attrs[i].string;
4685 break;
4689 /* Process the option. */
4690 if (opt == N_OPTS)
4692 error ("attribute(target(\"%s\")) is unknown", orig_p);
4693 ret = false;
4696 else if (type == ix86_opt_isa)
4698 struct cl_decoded_option decoded;
4700 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4701 ix86_handle_option (opts, opts_set,
4702 &decoded, input_location);
4705 else if (type == ix86_opt_yes || type == ix86_opt_no)
4707 if (type == ix86_opt_no)
4708 opt_set_p = !opt_set_p;
4710 if (opt_set_p)
4711 opts->x_target_flags |= mask;
4712 else
4713 opts->x_target_flags &= ~mask;
4716 else if (type == ix86_opt_str)
4718 if (p_strings[opt])
4720 error ("option(\"%s\") was already specified", opt_string);
4721 ret = false;
4723 else
4724 p_strings[opt] = xstrdup (p + opt_len);
4727 else if (type == ix86_opt_enum)
4729 bool arg_ok;
4730 int value;
4732 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4733 if (arg_ok)
4734 set_option (opts, enum_opts_set, opt, value,
4735 p + opt_len, DK_UNSPECIFIED, input_location,
4736 global_dc);
4737 else
4739 error ("attribute(target(\"%s\")) is unknown", orig_p);
4740 ret = false;
4744 else
4745 gcc_unreachable ();
4748 return ret;
4751 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4753 tree
4754 ix86_valid_target_attribute_tree (tree args,
4755 struct gcc_options *opts,
4756 struct gcc_options *opts_set)
4758 const char *orig_arch_string = opts->x_ix86_arch_string;
4759 const char *orig_tune_string = opts->x_ix86_tune_string;
4760 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4761 int orig_tune_defaulted = ix86_tune_defaulted;
4762 int orig_arch_specified = ix86_arch_specified;
4763 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4764 tree t = NULL_TREE;
4765 int i;
4766 struct cl_target_option *def
4767 = TREE_TARGET_OPTION (target_option_default_node);
4768 struct gcc_options enum_opts_set;
4770 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4772 /* Process each of the options on the chain. */
4773 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4774 opts_set, &enum_opts_set))
4775 return error_mark_node;
4777 /* If the changed options are different from the default, rerun
4778 ix86_option_override_internal, and then save the options away.
4779 The string options are are attribute options, and will be undone
4780 when we copy the save structure. */
4781 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4782 || opts->x_target_flags != def->x_target_flags
4783 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4784 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4785 || enum_opts_set.x_ix86_fpmath)
4787 /* If we are using the default tune= or arch=, undo the string assigned,
4788 and use the default. */
4789 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4790 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4791 else if (!orig_arch_specified)
4792 opts->x_ix86_arch_string = NULL;
4794 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4795 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4796 else if (orig_tune_defaulted)
4797 opts->x_ix86_tune_string = NULL;
4799 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4800 if (enum_opts_set.x_ix86_fpmath)
4801 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4802 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4803 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4805 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4806 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4809 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4810 ix86_option_override_internal (false, opts, opts_set);
4812 /* Add any builtin functions with the new isa if any. */
4813 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4815 /* Save the current options unless we are validating options for
4816 #pragma. */
4817 t = build_target_option_node (opts);
4819 opts->x_ix86_arch_string = orig_arch_string;
4820 opts->x_ix86_tune_string = orig_tune_string;
4821 opts_set->x_ix86_fpmath = orig_fpmath_set;
4823 /* Free up memory allocated to hold the strings */
4824 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4825 free (option_strings[i]);
4828 return t;
4831 /* Hook to validate attribute((target("string"))). */
4833 static bool
4834 ix86_valid_target_attribute_p (tree fndecl,
4835 tree ARG_UNUSED (name),
4836 tree args,
4837 int ARG_UNUSED (flags))
4839 struct gcc_options func_options;
4840 tree new_target, new_optimize;
4841 bool ret = true;
4843 /* attribute((target("default"))) does nothing, beyond
4844 affecting multi-versioning. */
4845 if (TREE_VALUE (args)
4846 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4847 && TREE_CHAIN (args) == NULL_TREE
4848 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4849 return true;
4851 tree old_optimize = build_optimization_node (&global_options);
4853 /* Get the optimization options of the current function. */
4854 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4856 if (!func_optimize)
4857 func_optimize = old_optimize;
4859 /* Init func_options. */
4860 memset (&func_options, 0, sizeof (func_options));
4861 init_options_struct (&func_options, NULL);
4862 lang_hooks.init_options_struct (&func_options);
4864 cl_optimization_restore (&func_options,
4865 TREE_OPTIMIZATION (func_optimize));
4867 /* Initialize func_options to the default before its target options can
4868 be set. */
4869 cl_target_option_restore (&func_options,
4870 TREE_TARGET_OPTION (target_option_default_node));
4872 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4873 &global_options_set);
4875 new_optimize = build_optimization_node (&func_options);
4877 if (new_target == error_mark_node)
4878 ret = false;
4880 else if (fndecl && new_target)
4882 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4884 if (old_optimize != new_optimize)
4885 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4888 return ret;
4892 /* Hook to determine if one function can safely inline another. */
4894 static bool
4895 ix86_can_inline_p (tree caller, tree callee)
4897 bool ret = false;
4898 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4899 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4901 /* If callee has no option attributes, then it is ok to inline. */
4902 if (!callee_tree)
4903 ret = true;
4905 /* If caller has no option attributes, but callee does then it is not ok to
4906 inline. */
4907 else if (!caller_tree)
4908 ret = false;
4910 else
4912 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4913 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4915 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4916 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4917 function. */
4918 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4919 != callee_opts->x_ix86_isa_flags)
4920 ret = false;
4922 /* See if we have the same non-isa options. */
4923 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4924 ret = false;
4926 /* See if arch, tune, etc. are the same. */
4927 else if (caller_opts->arch != callee_opts->arch)
4928 ret = false;
4930 else if (caller_opts->tune != callee_opts->tune)
4931 ret = false;
4933 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4934 ret = false;
4936 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4937 ret = false;
4939 else
4940 ret = true;
4943 return ret;
4947 /* Remember the last target of ix86_set_current_function. */
4948 static GTY(()) tree ix86_previous_fndecl;
4950 /* Invalidate ix86_previous_fndecl cache. */
4951 void
4952 ix86_reset_previous_fndecl (void)
4954 ix86_previous_fndecl = NULL_TREE;
4957 /* Establish appropriate back-end context for processing the function
4958 FNDECL. The argument might be NULL to indicate processing at top
4959 level, outside of any function scope. */
4960 static void
4961 ix86_set_current_function (tree fndecl)
4963 /* Only change the context if the function changes. This hook is called
4964 several times in the course of compiling a function, and we don't want to
4965 slow things down too much or call target_reinit when it isn't safe. */
4966 if (fndecl && fndecl != ix86_previous_fndecl)
4968 tree old_tree = (ix86_previous_fndecl
4969 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4970 : NULL_TREE);
4972 tree new_tree = (fndecl
4973 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4974 : NULL_TREE);
4976 ix86_previous_fndecl = fndecl;
4977 if (old_tree == new_tree)
4980 else if (new_tree)
4982 cl_target_option_restore (&global_options,
4983 TREE_TARGET_OPTION (new_tree));
4984 if (TREE_TARGET_GLOBALS (new_tree))
4985 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4986 else
4987 TREE_TARGET_GLOBALS (new_tree)
4988 = save_target_globals_default_opts ();
4991 else if (old_tree)
4993 new_tree = target_option_current_node;
4994 cl_target_option_restore (&global_options,
4995 TREE_TARGET_OPTION (new_tree));
4996 if (TREE_TARGET_GLOBALS (new_tree))
4997 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4998 else if (new_tree == target_option_default_node)
4999 restore_target_globals (&default_target_globals);
5000 else
5001 TREE_TARGET_GLOBALS (new_tree)
5002 = save_target_globals_default_opts ();
5008 /* Return true if this goes in large data/bss. */
5010 static bool
5011 ix86_in_large_data_p (tree exp)
5013 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5014 return false;
5016 /* Functions are never large data. */
5017 if (TREE_CODE (exp) == FUNCTION_DECL)
5018 return false;
5020 /* Automatic variables are never large data. */
5021 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5022 return false;
5024 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5026 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5027 if (strcmp (section, ".ldata") == 0
5028 || strcmp (section, ".lbss") == 0)
5029 return true;
5030 return false;
5032 else
5034 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5036 /* If this is an incomplete type with size 0, then we can't put it
5037 in data because it might be too big when completed. */
5038 if (!size || size > ix86_section_threshold)
5039 return true;
5042 return false;
5045 /* Switch to the appropriate section for output of DECL.
5046 DECL is either a `VAR_DECL' node or a constant of some sort.
5047 RELOC indicates whether forming the initial value of DECL requires
5048 link-time relocations. */
5050 ATTRIBUTE_UNUSED static section *
5051 x86_64_elf_select_section (tree decl, int reloc,
5052 unsigned HOST_WIDE_INT align)
5054 if (ix86_in_large_data_p (decl))
5056 const char *sname = NULL;
5057 unsigned int flags = SECTION_WRITE;
5058 switch (categorize_decl_for_section (decl, reloc))
5060 case SECCAT_DATA:
5061 sname = ".ldata";
5062 break;
5063 case SECCAT_DATA_REL:
5064 sname = ".ldata.rel";
5065 break;
5066 case SECCAT_DATA_REL_LOCAL:
5067 sname = ".ldata.rel.local";
5068 break;
5069 case SECCAT_DATA_REL_RO:
5070 sname = ".ldata.rel.ro";
5071 break;
5072 case SECCAT_DATA_REL_RO_LOCAL:
5073 sname = ".ldata.rel.ro.local";
5074 break;
5075 case SECCAT_BSS:
5076 sname = ".lbss";
5077 flags |= SECTION_BSS;
5078 break;
5079 case SECCAT_RODATA:
5080 case SECCAT_RODATA_MERGE_STR:
5081 case SECCAT_RODATA_MERGE_STR_INIT:
5082 case SECCAT_RODATA_MERGE_CONST:
5083 sname = ".lrodata";
5084 flags = 0;
5085 break;
5086 case SECCAT_SRODATA:
5087 case SECCAT_SDATA:
5088 case SECCAT_SBSS:
5089 gcc_unreachable ();
5090 case SECCAT_TEXT:
5091 case SECCAT_TDATA:
5092 case SECCAT_TBSS:
5093 /* We don't split these for medium model. Place them into
5094 default sections and hope for best. */
5095 break;
5097 if (sname)
5099 /* We might get called with string constants, but get_named_section
5100 doesn't like them as they are not DECLs. Also, we need to set
5101 flags in that case. */
5102 if (!DECL_P (decl))
5103 return get_section (sname, flags, NULL);
5104 return get_named_section (decl, sname, reloc);
5107 return default_elf_select_section (decl, reloc, align);
5110 /* Select a set of attributes for section NAME based on the properties
5111 of DECL and whether or not RELOC indicates that DECL's initializer
5112 might contain runtime relocations. */
5114 static unsigned int ATTRIBUTE_UNUSED
5115 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5117 unsigned int flags = default_section_type_flags (decl, name, reloc);
5119 if (decl == NULL_TREE
5120 && (strcmp (name, ".ldata.rel.ro") == 0
5121 || strcmp (name, ".ldata.rel.ro.local") == 0))
5122 flags |= SECTION_RELRO;
5124 if (strcmp (name, ".lbss") == 0
5125 || strncmp (name, ".lbss.", 5) == 0
5126 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5127 flags |= SECTION_BSS;
5129 return flags;
5132 /* Build up a unique section name, expressed as a
5133 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5134 RELOC indicates whether the initial value of EXP requires
5135 link-time relocations. */
5137 static void ATTRIBUTE_UNUSED
5138 x86_64_elf_unique_section (tree decl, int reloc)
5140 if (ix86_in_large_data_p (decl))
5142 const char *prefix = NULL;
5143 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5144 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5146 switch (categorize_decl_for_section (decl, reloc))
5148 case SECCAT_DATA:
5149 case SECCAT_DATA_REL:
5150 case SECCAT_DATA_REL_LOCAL:
5151 case SECCAT_DATA_REL_RO:
5152 case SECCAT_DATA_REL_RO_LOCAL:
5153 prefix = one_only ? ".ld" : ".ldata";
5154 break;
5155 case SECCAT_BSS:
5156 prefix = one_only ? ".lb" : ".lbss";
5157 break;
5158 case SECCAT_RODATA:
5159 case SECCAT_RODATA_MERGE_STR:
5160 case SECCAT_RODATA_MERGE_STR_INIT:
5161 case SECCAT_RODATA_MERGE_CONST:
5162 prefix = one_only ? ".lr" : ".lrodata";
5163 break;
5164 case SECCAT_SRODATA:
5165 case SECCAT_SDATA:
5166 case SECCAT_SBSS:
5167 gcc_unreachable ();
5168 case SECCAT_TEXT:
5169 case SECCAT_TDATA:
5170 case SECCAT_TBSS:
5171 /* We don't split these for medium model. Place them into
5172 default sections and hope for best. */
5173 break;
5175 if (prefix)
5177 const char *name, *linkonce;
5178 char *string;
5180 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5181 name = targetm.strip_name_encoding (name);
5183 /* If we're using one_only, then there needs to be a .gnu.linkonce
5184 prefix to the section name. */
5185 linkonce = one_only ? ".gnu.linkonce" : "";
5187 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5189 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5190 return;
5193 default_unique_section (decl, reloc);
5196 #ifdef COMMON_ASM_OP
5197 /* This says how to output assembler code to declare an
5198 uninitialized external linkage data object.
5200 For medium model x86-64 we need to use .largecomm opcode for
5201 large objects. */
5202 void
5203 x86_elf_aligned_common (FILE *file,
5204 const char *name, unsigned HOST_WIDE_INT size,
5205 int align)
5207 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5208 && size > (unsigned int)ix86_section_threshold)
5209 fputs ("\t.largecomm\t", file);
5210 else
5211 fputs (COMMON_ASM_OP, file);
5212 assemble_name (file, name);
5213 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5214 size, align / BITS_PER_UNIT);
5216 #endif
5218 /* Utility function for targets to use in implementing
5219 ASM_OUTPUT_ALIGNED_BSS. */
5221 void
5222 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5223 const char *name, unsigned HOST_WIDE_INT size,
5224 int align)
5226 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5227 && size > (unsigned int)ix86_section_threshold)
5228 switch_to_section (get_named_section (decl, ".lbss", 0));
5229 else
5230 switch_to_section (bss_section);
5231 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5232 #ifdef ASM_DECLARE_OBJECT_NAME
5233 last_assemble_variable_decl = decl;
5234 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5235 #else
5236 /* Standard thing is just output label for the object. */
5237 ASM_OUTPUT_LABEL (file, name);
5238 #endif /* ASM_DECLARE_OBJECT_NAME */
5239 ASM_OUTPUT_SKIP (file, size ? size : 1);
5242 /* Decide whether we must probe the stack before any space allocation
5243 on this target. It's essentially TARGET_STACK_PROBE except when
5244 -fstack-check causes the stack to be already probed differently. */
5246 bool
5247 ix86_target_stack_probe (void)
5249 /* Do not probe the stack twice if static stack checking is enabled. */
5250 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5251 return false;
5253 return TARGET_STACK_PROBE;
5256 /* Decide whether we can make a sibling call to a function. DECL is the
5257 declaration of the function being targeted by the call and EXP is the
5258 CALL_EXPR representing the call. */
5260 static bool
5261 ix86_function_ok_for_sibcall (tree decl, tree exp)
5263 tree type, decl_or_type;
5264 rtx a, b;
5266 /* If we are generating position-independent code, we cannot sibcall
5267 optimize any indirect call, or a direct call to a global function,
5268 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5269 if (!TARGET_MACHO
5270 && !TARGET_64BIT
5271 && flag_pic
5272 && (!decl || !targetm.binds_local_p (decl)))
5273 return false;
5275 /* If we need to align the outgoing stack, then sibcalling would
5276 unalign the stack, which may break the called function. */
5277 if (ix86_minimum_incoming_stack_boundary (true)
5278 < PREFERRED_STACK_BOUNDARY)
5279 return false;
5281 if (decl)
5283 decl_or_type = decl;
5284 type = TREE_TYPE (decl);
5286 else
5288 /* We're looking at the CALL_EXPR, we need the type of the function. */
5289 type = CALL_EXPR_FN (exp); /* pointer expression */
5290 type = TREE_TYPE (type); /* pointer type */
5291 type = TREE_TYPE (type); /* function type */
5292 decl_or_type = type;
5295 /* Check that the return value locations are the same. Like
5296 if we are returning floats on the 80387 register stack, we cannot
5297 make a sibcall from a function that doesn't return a float to a
5298 function that does or, conversely, from a function that does return
5299 a float to a function that doesn't; the necessary stack adjustment
5300 would not be executed. This is also the place we notice
5301 differences in the return value ABI. Note that it is ok for one
5302 of the functions to have void return type as long as the return
5303 value of the other is passed in a register. */
5304 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5305 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5306 cfun->decl, false);
5307 if (STACK_REG_P (a) || STACK_REG_P (b))
5309 if (!rtx_equal_p (a, b))
5310 return false;
5312 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5314 else if (!rtx_equal_p (a, b))
5315 return false;
5317 if (TARGET_64BIT)
5319 /* The SYSV ABI has more call-clobbered registers;
5320 disallow sibcalls from MS to SYSV. */
5321 if (cfun->machine->call_abi == MS_ABI
5322 && ix86_function_type_abi (type) == SYSV_ABI)
5323 return false;
5325 else
5327 /* If this call is indirect, we'll need to be able to use a
5328 call-clobbered register for the address of the target function.
5329 Make sure that all such registers are not used for passing
5330 parameters. Note that DLLIMPORT functions are indirect. */
5331 if (!decl
5332 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5334 if (ix86_function_regparm (type, NULL) >= 3)
5336 /* ??? Need to count the actual number of registers to be used,
5337 not the possible number of registers. Fix later. */
5338 return false;
5343 /* Otherwise okay. That also includes certain types of indirect calls. */
5344 return true;
5347 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5348 and "sseregparm" calling convention attributes;
5349 arguments as in struct attribute_spec.handler. */
5351 static tree
5352 ix86_handle_cconv_attribute (tree *node, tree name,
5353 tree args,
5354 int flags ATTRIBUTE_UNUSED,
5355 bool *no_add_attrs)
5357 if (TREE_CODE (*node) != FUNCTION_TYPE
5358 && TREE_CODE (*node) != METHOD_TYPE
5359 && TREE_CODE (*node) != FIELD_DECL
5360 && TREE_CODE (*node) != TYPE_DECL)
5362 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5363 name);
5364 *no_add_attrs = true;
5365 return NULL_TREE;
5368 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5369 if (is_attribute_p ("regparm", name))
5371 tree cst;
5373 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5375 error ("fastcall and regparm attributes are not compatible");
5378 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5380 error ("regparam and thiscall attributes are not compatible");
5383 cst = TREE_VALUE (args);
5384 if (TREE_CODE (cst) != INTEGER_CST)
5386 warning (OPT_Wattributes,
5387 "%qE attribute requires an integer constant argument",
5388 name);
5389 *no_add_attrs = true;
5391 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5393 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5394 name, REGPARM_MAX);
5395 *no_add_attrs = true;
5398 return NULL_TREE;
5401 if (TARGET_64BIT)
5403 /* Do not warn when emulating the MS ABI. */
5404 if ((TREE_CODE (*node) != FUNCTION_TYPE
5405 && TREE_CODE (*node) != METHOD_TYPE)
5406 || ix86_function_type_abi (*node) != MS_ABI)
5407 warning (OPT_Wattributes, "%qE attribute ignored",
5408 name);
5409 *no_add_attrs = true;
5410 return NULL_TREE;
5413 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5414 if (is_attribute_p ("fastcall", name))
5416 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5418 error ("fastcall and cdecl attributes are not compatible");
5420 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5422 error ("fastcall and stdcall attributes are not compatible");
5424 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5426 error ("fastcall and regparm attributes are not compatible");
5428 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5430 error ("fastcall and thiscall attributes are not compatible");
5434 /* Can combine stdcall with fastcall (redundant), regparm and
5435 sseregparm. */
5436 else if (is_attribute_p ("stdcall", name))
5438 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5440 error ("stdcall and cdecl attributes are not compatible");
5442 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5444 error ("stdcall and fastcall attributes are not compatible");
5446 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5448 error ("stdcall and thiscall attributes are not compatible");
5452 /* Can combine cdecl with regparm and sseregparm. */
5453 else if (is_attribute_p ("cdecl", name))
5455 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5457 error ("stdcall and cdecl attributes are not compatible");
5459 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5461 error ("fastcall and cdecl attributes are not compatible");
5463 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5465 error ("cdecl and thiscall attributes are not compatible");
5468 else if (is_attribute_p ("thiscall", name))
5470 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5471 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5472 name);
5473 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5475 error ("stdcall and thiscall attributes are not compatible");
5477 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5479 error ("fastcall and thiscall attributes are not compatible");
5481 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5483 error ("cdecl and thiscall attributes are not compatible");
5487 /* Can combine sseregparm with all attributes. */
5489 return NULL_TREE;
5492 /* The transactional memory builtins are implicitly regparm or fastcall
5493 depending on the ABI. Override the generic do-nothing attribute that
5494 these builtins were declared with, and replace it with one of the two
5495 attributes that we expect elsewhere. */
5497 static tree
5498 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5499 tree args ATTRIBUTE_UNUSED,
5500 int flags, bool *no_add_attrs)
5502 tree alt;
5504 /* In no case do we want to add the placeholder attribute. */
5505 *no_add_attrs = true;
5507 /* The 64-bit ABI is unchanged for transactional memory. */
5508 if (TARGET_64BIT)
5509 return NULL_TREE;
5511 /* ??? Is there a better way to validate 32-bit windows? We have
5512 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5513 if (CHECK_STACK_LIMIT > 0)
5514 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5515 else
5517 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5518 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5520 decl_attributes (node, alt, flags);
5522 return NULL_TREE;
5525 /* This function determines from TYPE the calling-convention. */
5527 unsigned int
5528 ix86_get_callcvt (const_tree type)
5530 unsigned int ret = 0;
5531 bool is_stdarg;
5532 tree attrs;
5534 if (TARGET_64BIT)
5535 return IX86_CALLCVT_CDECL;
5537 attrs = TYPE_ATTRIBUTES (type);
5538 if (attrs != NULL_TREE)
5540 if (lookup_attribute ("cdecl", attrs))
5541 ret |= IX86_CALLCVT_CDECL;
5542 else if (lookup_attribute ("stdcall", attrs))
5543 ret |= IX86_CALLCVT_STDCALL;
5544 else if (lookup_attribute ("fastcall", attrs))
5545 ret |= IX86_CALLCVT_FASTCALL;
5546 else if (lookup_attribute ("thiscall", attrs))
5547 ret |= IX86_CALLCVT_THISCALL;
5549 /* Regparam isn't allowed for thiscall and fastcall. */
5550 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5552 if (lookup_attribute ("regparm", attrs))
5553 ret |= IX86_CALLCVT_REGPARM;
5554 if (lookup_attribute ("sseregparm", attrs))
5555 ret |= IX86_CALLCVT_SSEREGPARM;
5558 if (IX86_BASE_CALLCVT(ret) != 0)
5559 return ret;
5562 is_stdarg = stdarg_p (type);
5563 if (TARGET_RTD && !is_stdarg)
5564 return IX86_CALLCVT_STDCALL | ret;
5566 if (ret != 0
5567 || is_stdarg
5568 || TREE_CODE (type) != METHOD_TYPE
5569 || ix86_function_type_abi (type) != MS_ABI)
5570 return IX86_CALLCVT_CDECL | ret;
5572 return IX86_CALLCVT_THISCALL;
5575 /* Return 0 if the attributes for two types are incompatible, 1 if they
5576 are compatible, and 2 if they are nearly compatible (which causes a
5577 warning to be generated). */
5579 static int
5580 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5582 unsigned int ccvt1, ccvt2;
5584 if (TREE_CODE (type1) != FUNCTION_TYPE
5585 && TREE_CODE (type1) != METHOD_TYPE)
5586 return 1;
5588 ccvt1 = ix86_get_callcvt (type1);
5589 ccvt2 = ix86_get_callcvt (type2);
5590 if (ccvt1 != ccvt2)
5591 return 0;
5592 if (ix86_function_regparm (type1, NULL)
5593 != ix86_function_regparm (type2, NULL))
5594 return 0;
5596 return 1;
5599 /* Return the regparm value for a function with the indicated TYPE and DECL.
5600 DECL may be NULL when calling function indirectly
5601 or considering a libcall. */
5603 static int
5604 ix86_function_regparm (const_tree type, const_tree decl)
5606 tree attr;
5607 int regparm;
5608 unsigned int ccvt;
5610 if (TARGET_64BIT)
5611 return (ix86_function_type_abi (type) == SYSV_ABI
5612 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5613 ccvt = ix86_get_callcvt (type);
5614 regparm = ix86_regparm;
5616 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5618 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5619 if (attr)
5621 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5622 return regparm;
5625 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5626 return 2;
5627 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5628 return 1;
5630 /* Use register calling convention for local functions when possible. */
5631 if (decl
5632 && TREE_CODE (decl) == FUNCTION_DECL
5633 /* Caller and callee must agree on the calling convention, so
5634 checking here just optimize means that with
5635 __attribute__((optimize (...))) caller could use regparm convention
5636 and callee not, or vice versa. Instead look at whether the callee
5637 is optimized or not. */
5638 && opt_for_fn (decl, optimize)
5639 && !(profile_flag && !flag_fentry))
5641 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5642 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5643 if (i && i->local && i->can_change_signature)
5645 int local_regparm, globals = 0, regno;
5647 /* Make sure no regparm register is taken by a
5648 fixed register variable. */
5649 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5650 if (fixed_regs[local_regparm])
5651 break;
5653 /* We don't want to use regparm(3) for nested functions as
5654 these use a static chain pointer in the third argument. */
5655 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5656 local_regparm = 2;
5658 /* In 32-bit mode save a register for the split stack. */
5659 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5660 local_regparm = 2;
5662 /* Each fixed register usage increases register pressure,
5663 so less registers should be used for argument passing.
5664 This functionality can be overriden by an explicit
5665 regparm value. */
5666 for (regno = AX_REG; regno <= DI_REG; regno++)
5667 if (fixed_regs[regno])
5668 globals++;
5670 local_regparm
5671 = globals < local_regparm ? local_regparm - globals : 0;
5673 if (local_regparm > regparm)
5674 regparm = local_regparm;
5678 return regparm;
5681 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5682 DFmode (2) arguments in SSE registers for a function with the
5683 indicated TYPE and DECL. DECL may be NULL when calling function
5684 indirectly or considering a libcall. Otherwise return 0. */
5686 static int
5687 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5689 gcc_assert (!TARGET_64BIT);
5691 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5692 by the sseregparm attribute. */
5693 if (TARGET_SSEREGPARM
5694 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5696 if (!TARGET_SSE)
5698 if (warn)
5700 if (decl)
5701 error ("calling %qD with attribute sseregparm without "
5702 "SSE/SSE2 enabled", decl);
5703 else
5704 error ("calling %qT with attribute sseregparm without "
5705 "SSE/SSE2 enabled", type);
5707 return 0;
5710 return 2;
5713 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5714 (and DFmode for SSE2) arguments in SSE registers. */
5715 if (decl && TARGET_SSE_MATH && optimize
5716 && !(profile_flag && !flag_fentry))
5718 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5719 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5720 if (i && i->local && i->can_change_signature)
5721 return TARGET_SSE2 ? 2 : 1;
5724 return 0;
5727 /* Return true if EAX is live at the start of the function. Used by
5728 ix86_expand_prologue to determine if we need special help before
5729 calling allocate_stack_worker. */
5731 static bool
5732 ix86_eax_live_at_start_p (void)
5734 /* Cheat. Don't bother working forward from ix86_function_regparm
5735 to the function type to whether an actual argument is located in
5736 eax. Instead just look at cfg info, which is still close enough
5737 to correct at this point. This gives false positives for broken
5738 functions that might use uninitialized data that happens to be
5739 allocated in eax, but who cares? */
5740 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5743 static bool
5744 ix86_keep_aggregate_return_pointer (tree fntype)
5746 tree attr;
5748 if (!TARGET_64BIT)
5750 attr = lookup_attribute ("callee_pop_aggregate_return",
5751 TYPE_ATTRIBUTES (fntype));
5752 if (attr)
5753 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5755 /* For 32-bit MS-ABI the default is to keep aggregate
5756 return pointer. */
5757 if (ix86_function_type_abi (fntype) == MS_ABI)
5758 return true;
5760 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5763 /* Value is the number of bytes of arguments automatically
5764 popped when returning from a subroutine call.
5765 FUNDECL is the declaration node of the function (as a tree),
5766 FUNTYPE is the data type of the function (as a tree),
5767 or for a library call it is an identifier node for the subroutine name.
5768 SIZE is the number of bytes of arguments passed on the stack.
5770 On the 80386, the RTD insn may be used to pop them if the number
5771 of args is fixed, but if the number is variable then the caller
5772 must pop them all. RTD can't be used for library calls now
5773 because the library is compiled with the Unix compiler.
5774 Use of RTD is a selectable option, since it is incompatible with
5775 standard Unix calling sequences. If the option is not selected,
5776 the caller must always pop the args.
5778 The attribute stdcall is equivalent to RTD on a per module basis. */
5780 static int
5781 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5783 unsigned int ccvt;
5785 /* None of the 64-bit ABIs pop arguments. */
5786 if (TARGET_64BIT)
5787 return 0;
5789 ccvt = ix86_get_callcvt (funtype);
5791 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5792 | IX86_CALLCVT_THISCALL)) != 0
5793 && ! stdarg_p (funtype))
5794 return size;
5796 /* Lose any fake structure return argument if it is passed on the stack. */
5797 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5798 && !ix86_keep_aggregate_return_pointer (funtype))
5800 int nregs = ix86_function_regparm (funtype, fundecl);
5801 if (nregs == 0)
5802 return GET_MODE_SIZE (Pmode);
5805 return 0;
5808 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5810 static bool
5811 ix86_legitimate_combined_insn (rtx insn)
5813 /* Check operand constraints in case hard registers were propagated
5814 into insn pattern. This check prevents combine pass from
5815 generating insn patterns with invalid hard register operands.
5816 These invalid insns can eventually confuse reload to error out
5817 with a spill failure. See also PRs 46829 and 46843. */
5818 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5820 int i;
5822 extract_insn (insn);
5823 preprocess_constraints ();
5825 for (i = 0; i < recog_data.n_operands; i++)
5827 rtx op = recog_data.operand[i];
5828 enum machine_mode mode = GET_MODE (op);
5829 struct operand_alternative *op_alt;
5830 int offset = 0;
5831 bool win;
5832 int j;
5834 /* For pre-AVX disallow unaligned loads/stores where the
5835 instructions don't support it. */
5836 if (!TARGET_AVX
5837 && VECTOR_MODE_P (GET_MODE (op))
5838 && misaligned_operand (op, GET_MODE (op)))
5840 int min_align = get_attr_ssememalign (insn);
5841 if (min_align == 0)
5842 return false;
5845 /* A unary operator may be accepted by the predicate, but it
5846 is irrelevant for matching constraints. */
5847 if (UNARY_P (op))
5848 op = XEXP (op, 0);
5850 if (GET_CODE (op) == SUBREG)
5852 if (REG_P (SUBREG_REG (op))
5853 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5854 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5855 GET_MODE (SUBREG_REG (op)),
5856 SUBREG_BYTE (op),
5857 GET_MODE (op));
5858 op = SUBREG_REG (op);
5861 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5862 continue;
5864 op_alt = recog_op_alt[i];
5866 /* Operand has no constraints, anything is OK. */
5867 win = !recog_data.n_alternatives;
5869 for (j = 0; j < recog_data.n_alternatives; j++)
5871 if (op_alt[j].anything_ok
5872 || (op_alt[j].matches != -1
5873 && operands_match_p
5874 (recog_data.operand[i],
5875 recog_data.operand[op_alt[j].matches]))
5876 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5878 win = true;
5879 break;
5883 if (!win)
5884 return false;
5888 return true;
5891 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5893 static unsigned HOST_WIDE_INT
5894 ix86_asan_shadow_offset (void)
5896 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5897 : HOST_WIDE_INT_C (0x7fff8000))
5898 : (HOST_WIDE_INT_1 << 29);
5901 /* Argument support functions. */
5903 /* Return true when register may be used to pass function parameters. */
5904 bool
5905 ix86_function_arg_regno_p (int regno)
5907 int i;
5908 enum calling_abi call_abi;
5909 const int *parm_regs;
5911 if (!TARGET_64BIT)
5913 if (TARGET_MACHO)
5914 return (regno < REGPARM_MAX
5915 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5916 else
5917 return (regno < REGPARM_MAX
5918 || (TARGET_MMX && MMX_REGNO_P (regno)
5919 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5920 || (TARGET_SSE && SSE_REGNO_P (regno)
5921 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5924 if (TARGET_SSE && SSE_REGNO_P (regno)
5925 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5926 return true;
5928 /* TODO: The function should depend on current function ABI but
5929 builtins.c would need updating then. Therefore we use the
5930 default ABI. */
5931 call_abi = ix86_cfun_abi ();
5933 /* RAX is used as hidden argument to va_arg functions. */
5934 if (call_abi == SYSV_ABI && regno == AX_REG)
5935 return true;
5937 if (call_abi == MS_ABI)
5938 parm_regs = x86_64_ms_abi_int_parameter_registers;
5939 else
5940 parm_regs = x86_64_int_parameter_registers;
5942 for (i = 0; i < (call_abi == MS_ABI
5943 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5944 if (regno == parm_regs[i])
5945 return true;
5946 return false;
5949 /* Return if we do not know how to pass TYPE solely in registers. */
5951 static bool
5952 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5954 if (must_pass_in_stack_var_size_or_pad (mode, type))
5955 return true;
5957 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5958 The layout_type routine is crafty and tries to trick us into passing
5959 currently unsupported vector types on the stack by using TImode. */
5960 return (!TARGET_64BIT && mode == TImode
5961 && type && TREE_CODE (type) != VECTOR_TYPE);
5964 /* It returns the size, in bytes, of the area reserved for arguments passed
5965 in registers for the function represented by fndecl dependent to the used
5966 abi format. */
5968 ix86_reg_parm_stack_space (const_tree fndecl)
5970 enum calling_abi call_abi = SYSV_ABI;
5971 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5972 call_abi = ix86_function_abi (fndecl);
5973 else
5974 call_abi = ix86_function_type_abi (fndecl);
5975 if (TARGET_64BIT && call_abi == MS_ABI)
5976 return 32;
5977 return 0;
5980 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5981 call abi used. */
5982 enum calling_abi
5983 ix86_function_type_abi (const_tree fntype)
5985 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5987 enum calling_abi abi = ix86_abi;
5988 if (abi == SYSV_ABI)
5990 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5992 if (TARGET_X32)
5994 static bool warned = false;
5995 if (!warned)
5997 error ("X32 does not support ms_abi attribute");
5998 warned = true;
6001 abi = MS_ABI;
6004 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6005 abi = SYSV_ABI;
6006 return abi;
6008 return ix86_abi;
6011 /* We add this as a workaround in order to use libc_has_function
6012 hook in i386.md. */
6013 bool
6014 ix86_libc_has_function (enum function_class fn_class)
6016 return targetm.libc_has_function (fn_class);
6019 static bool
6020 ix86_function_ms_hook_prologue (const_tree fn)
6022 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6024 if (decl_function_context (fn) != NULL_TREE)
6025 error_at (DECL_SOURCE_LOCATION (fn),
6026 "ms_hook_prologue is not compatible with nested function");
6027 else
6028 return true;
6030 return false;
6033 static enum calling_abi
6034 ix86_function_abi (const_tree fndecl)
6036 if (! fndecl)
6037 return ix86_abi;
6038 return ix86_function_type_abi (TREE_TYPE (fndecl));
6041 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6042 call abi used. */
6043 enum calling_abi
6044 ix86_cfun_abi (void)
6046 if (! cfun)
6047 return ix86_abi;
6048 return cfun->machine->call_abi;
6051 /* Write the extra assembler code needed to declare a function properly. */
6053 void
6054 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6055 tree decl)
6057 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6059 if (is_ms_hook)
6061 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6062 unsigned int filler_cc = 0xcccccccc;
6064 for (i = 0; i < filler_count; i += 4)
6065 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6068 #ifdef SUBTARGET_ASM_UNWIND_INIT
6069 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6070 #endif
6072 ASM_OUTPUT_LABEL (asm_out_file, fname);
6074 /* Output magic byte marker, if hot-patch attribute is set. */
6075 if (is_ms_hook)
6077 if (TARGET_64BIT)
6079 /* leaq [%rsp + 0], %rsp */
6080 asm_fprintf (asm_out_file, ASM_BYTE
6081 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6083 else
6085 /* movl.s %edi, %edi
6086 push %ebp
6087 movl.s %esp, %ebp */
6088 asm_fprintf (asm_out_file, ASM_BYTE
6089 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6094 /* regclass.c */
6095 extern void init_regs (void);
6097 /* Implementation of call abi switching target hook. Specific to FNDECL
6098 the specific call register sets are set. See also
6099 ix86_conditional_register_usage for more details. */
6100 void
6101 ix86_call_abi_override (const_tree fndecl)
6103 if (fndecl == NULL_TREE)
6104 cfun->machine->call_abi = ix86_abi;
6105 else
6106 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6109 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6110 expensive re-initialization of init_regs each time we switch function context
6111 since this is needed only during RTL expansion. */
6112 static void
6113 ix86_maybe_switch_abi (void)
6115 if (TARGET_64BIT &&
6116 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6117 reinit_regs ();
6120 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6121 for a call to a function whose data type is FNTYPE.
6122 For a library call, FNTYPE is 0. */
6124 void
6125 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6126 tree fntype, /* tree ptr for function decl */
6127 rtx libname, /* SYMBOL_REF of library name or 0 */
6128 tree fndecl,
6129 int caller)
6131 struct cgraph_local_info *i;
6133 memset (cum, 0, sizeof (*cum));
6135 if (fndecl)
6137 i = cgraph_local_info (fndecl);
6138 cum->call_abi = ix86_function_abi (fndecl);
6140 else
6142 i = NULL;
6143 cum->call_abi = ix86_function_type_abi (fntype);
6146 cum->caller = caller;
6148 /* Set up the number of registers to use for passing arguments. */
6149 cum->nregs = ix86_regparm;
6150 if (TARGET_64BIT)
6152 cum->nregs = (cum->call_abi == SYSV_ABI
6153 ? X86_64_REGPARM_MAX
6154 : X86_64_MS_REGPARM_MAX);
6156 if (TARGET_SSE)
6158 cum->sse_nregs = SSE_REGPARM_MAX;
6159 if (TARGET_64BIT)
6161 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6162 ? X86_64_SSE_REGPARM_MAX
6163 : X86_64_MS_SSE_REGPARM_MAX);
6166 if (TARGET_MMX)
6167 cum->mmx_nregs = MMX_REGPARM_MAX;
6168 cum->warn_avx512f = true;
6169 cum->warn_avx = true;
6170 cum->warn_sse = true;
6171 cum->warn_mmx = true;
6173 /* Because type might mismatch in between caller and callee, we need to
6174 use actual type of function for local calls.
6175 FIXME: cgraph_analyze can be told to actually record if function uses
6176 va_start so for local functions maybe_vaarg can be made aggressive
6177 helping K&R code.
6178 FIXME: once typesytem is fixed, we won't need this code anymore. */
6179 if (i && i->local && i->can_change_signature)
6180 fntype = TREE_TYPE (fndecl);
6181 cum->maybe_vaarg = (fntype
6182 ? (!prototype_p (fntype) || stdarg_p (fntype))
6183 : !libname);
6185 if (!TARGET_64BIT)
6187 /* If there are variable arguments, then we won't pass anything
6188 in registers in 32-bit mode. */
6189 if (stdarg_p (fntype))
6191 cum->nregs = 0;
6192 cum->sse_nregs = 0;
6193 cum->mmx_nregs = 0;
6194 cum->warn_avx512f = false;
6195 cum->warn_avx = false;
6196 cum->warn_sse = false;
6197 cum->warn_mmx = false;
6198 return;
6201 /* Use ecx and edx registers if function has fastcall attribute,
6202 else look for regparm information. */
6203 if (fntype)
6205 unsigned int ccvt = ix86_get_callcvt (fntype);
6206 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6208 cum->nregs = 1;
6209 cum->fastcall = 1; /* Same first register as in fastcall. */
6211 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6213 cum->nregs = 2;
6214 cum->fastcall = 1;
6216 else
6217 cum->nregs = ix86_function_regparm (fntype, fndecl);
6220 /* Set up the number of SSE registers used for passing SFmode
6221 and DFmode arguments. Warn for mismatching ABI. */
6222 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6226 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6227 But in the case of vector types, it is some vector mode.
6229 When we have only some of our vector isa extensions enabled, then there
6230 are some modes for which vector_mode_supported_p is false. For these
6231 modes, the generic vector support in gcc will choose some non-vector mode
6232 in order to implement the type. By computing the natural mode, we'll
6233 select the proper ABI location for the operand and not depend on whatever
6234 the middle-end decides to do with these vector types.
6236 The midde-end can't deal with the vector types > 16 bytes. In this
6237 case, we return the original mode and warn ABI change if CUM isn't
6238 NULL.
6240 If INT_RETURN is true, warn ABI change if the vector mode isn't
6241 available for function return value. */
6243 static enum machine_mode
6244 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6245 bool in_return)
6247 enum machine_mode mode = TYPE_MODE (type);
6249 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6251 HOST_WIDE_INT size = int_size_in_bytes (type);
6252 if ((size == 8 || size == 16 || size == 32 || size == 64)
6253 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6254 && TYPE_VECTOR_SUBPARTS (type) > 1)
6256 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6258 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6259 mode = MIN_MODE_VECTOR_FLOAT;
6260 else
6261 mode = MIN_MODE_VECTOR_INT;
6263 /* Get the mode which has this inner mode and number of units. */
6264 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6265 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6266 && GET_MODE_INNER (mode) == innermode)
6268 if (size == 64 && !TARGET_AVX512F)
6270 static bool warnedavx512f;
6271 static bool warnedavx512f_ret;
6273 if (cum && cum->warn_avx512f && !warnedavx512f)
6275 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6276 "without AVX512F enabled changes the ABI"))
6277 warnedavx512f = true;
6279 else if (in_return && !warnedavx512f_ret)
6281 if (warning (OPT_Wpsabi, "AVX512F vector return "
6282 "without AVX512F enabled changes the ABI"))
6283 warnedavx512f_ret = true;
6286 return TYPE_MODE (type);
6288 else if (size == 32 && !TARGET_AVX)
6290 static bool warnedavx;
6291 static bool warnedavx_ret;
6293 if (cum && cum->warn_avx && !warnedavx)
6295 if (warning (OPT_Wpsabi, "AVX vector argument "
6296 "without AVX enabled changes the ABI"))
6297 warnedavx = true;
6299 else if (in_return && !warnedavx_ret)
6301 if (warning (OPT_Wpsabi, "AVX vector return "
6302 "without AVX enabled changes the ABI"))
6303 warnedavx_ret = true;
6306 return TYPE_MODE (type);
6308 else if (((size == 8 && TARGET_64BIT) || size == 16)
6309 && !TARGET_SSE)
6311 static bool warnedsse;
6312 static bool warnedsse_ret;
6314 if (cum && cum->warn_sse && !warnedsse)
6316 if (warning (OPT_Wpsabi, "SSE vector argument "
6317 "without SSE enabled changes the ABI"))
6318 warnedsse = true;
6320 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6322 if (warning (OPT_Wpsabi, "SSE vector return "
6323 "without SSE enabled changes the ABI"))
6324 warnedsse_ret = true;
6327 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6329 static bool warnedmmx;
6330 static bool warnedmmx_ret;
6332 if (cum && cum->warn_mmx && !warnedmmx)
6334 if (warning (OPT_Wpsabi, "MMX vector argument "
6335 "without MMX enabled changes the ABI"))
6336 warnedmmx = true;
6338 else if (in_return && !warnedmmx_ret)
6340 if (warning (OPT_Wpsabi, "MMX vector return "
6341 "without MMX enabled changes the ABI"))
6342 warnedmmx_ret = true;
6345 return mode;
6348 gcc_unreachable ();
6352 return mode;
6355 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6356 this may not agree with the mode that the type system has chosen for the
6357 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6358 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6360 static rtx
6361 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6362 unsigned int regno)
6364 rtx tmp;
6366 if (orig_mode != BLKmode)
6367 tmp = gen_rtx_REG (orig_mode, regno);
6368 else
6370 tmp = gen_rtx_REG (mode, regno);
6371 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6372 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6375 return tmp;
6378 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6379 of this code is to classify each 8bytes of incoming argument by the register
6380 class and assign registers accordingly. */
6382 /* Return the union class of CLASS1 and CLASS2.
6383 See the x86-64 PS ABI for details. */
6385 static enum x86_64_reg_class
6386 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6388 /* Rule #1: If both classes are equal, this is the resulting class. */
6389 if (class1 == class2)
6390 return class1;
6392 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6393 the other class. */
6394 if (class1 == X86_64_NO_CLASS)
6395 return class2;
6396 if (class2 == X86_64_NO_CLASS)
6397 return class1;
6399 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6400 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6401 return X86_64_MEMORY_CLASS;
6403 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6404 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6405 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6406 return X86_64_INTEGERSI_CLASS;
6407 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6408 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6409 return X86_64_INTEGER_CLASS;
6411 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6412 MEMORY is used. */
6413 if (class1 == X86_64_X87_CLASS
6414 || class1 == X86_64_X87UP_CLASS
6415 || class1 == X86_64_COMPLEX_X87_CLASS
6416 || class2 == X86_64_X87_CLASS
6417 || class2 == X86_64_X87UP_CLASS
6418 || class2 == X86_64_COMPLEX_X87_CLASS)
6419 return X86_64_MEMORY_CLASS;
6421 /* Rule #6: Otherwise class SSE is used. */
6422 return X86_64_SSE_CLASS;
6425 /* Classify the argument of type TYPE and mode MODE.
6426 CLASSES will be filled by the register class used to pass each word
6427 of the operand. The number of words is returned. In case the parameter
6428 should be passed in memory, 0 is returned. As a special case for zero
6429 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6431 BIT_OFFSET is used internally for handling records and specifies offset
6432 of the offset in bits modulo 512 to avoid overflow cases.
6434 See the x86-64 PS ABI for details.
6437 static int
6438 classify_argument (enum machine_mode mode, const_tree type,
6439 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6441 HOST_WIDE_INT bytes =
6442 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6443 int words
6444 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6446 /* Variable sized entities are always passed/returned in memory. */
6447 if (bytes < 0)
6448 return 0;
6450 if (mode != VOIDmode
6451 && targetm.calls.must_pass_in_stack (mode, type))
6452 return 0;
6454 if (type && AGGREGATE_TYPE_P (type))
6456 int i;
6457 tree field;
6458 enum x86_64_reg_class subclasses[MAX_CLASSES];
6460 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6461 if (bytes > 64)
6462 return 0;
6464 for (i = 0; i < words; i++)
6465 classes[i] = X86_64_NO_CLASS;
6467 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6468 signalize memory class, so handle it as special case. */
6469 if (!words)
6471 classes[0] = X86_64_NO_CLASS;
6472 return 1;
6475 /* Classify each field of record and merge classes. */
6476 switch (TREE_CODE (type))
6478 case RECORD_TYPE:
6479 /* And now merge the fields of structure. */
6480 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6482 if (TREE_CODE (field) == FIELD_DECL)
6484 int num;
6486 if (TREE_TYPE (field) == error_mark_node)
6487 continue;
6489 /* Bitfields are always classified as integer. Handle them
6490 early, since later code would consider them to be
6491 misaligned integers. */
6492 if (DECL_BIT_FIELD (field))
6494 for (i = (int_bit_position (field)
6495 + (bit_offset % 64)) / 8 / 8;
6496 i < ((int_bit_position (field) + (bit_offset % 64))
6497 + tree_to_shwi (DECL_SIZE (field))
6498 + 63) / 8 / 8; i++)
6499 classes[i] =
6500 merge_classes (X86_64_INTEGER_CLASS,
6501 classes[i]);
6503 else
6505 int pos;
6507 type = TREE_TYPE (field);
6509 /* Flexible array member is ignored. */
6510 if (TYPE_MODE (type) == BLKmode
6511 && TREE_CODE (type) == ARRAY_TYPE
6512 && TYPE_SIZE (type) == NULL_TREE
6513 && TYPE_DOMAIN (type) != NULL_TREE
6514 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6515 == NULL_TREE))
6517 static bool warned;
6519 if (!warned && warn_psabi)
6521 warned = true;
6522 inform (input_location,
6523 "the ABI of passing struct with"
6524 " a flexible array member has"
6525 " changed in GCC 4.4");
6527 continue;
6529 num = classify_argument (TYPE_MODE (type), type,
6530 subclasses,
6531 (int_bit_position (field)
6532 + bit_offset) % 512);
6533 if (!num)
6534 return 0;
6535 pos = (int_bit_position (field)
6536 + (bit_offset % 64)) / 8 / 8;
6537 for (i = 0; i < num && (i + pos) < words; i++)
6538 classes[i + pos] =
6539 merge_classes (subclasses[i], classes[i + pos]);
6543 break;
6545 case ARRAY_TYPE:
6546 /* Arrays are handled as small records. */
6548 int num;
6549 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6550 TREE_TYPE (type), subclasses, bit_offset);
6551 if (!num)
6552 return 0;
6554 /* The partial classes are now full classes. */
6555 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6556 subclasses[0] = X86_64_SSE_CLASS;
6557 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6558 && !((bit_offset % 64) == 0 && bytes == 4))
6559 subclasses[0] = X86_64_INTEGER_CLASS;
6561 for (i = 0; i < words; i++)
6562 classes[i] = subclasses[i % num];
6564 break;
6566 case UNION_TYPE:
6567 case QUAL_UNION_TYPE:
6568 /* Unions are similar to RECORD_TYPE but offset is always 0.
6570 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6572 if (TREE_CODE (field) == FIELD_DECL)
6574 int num;
6576 if (TREE_TYPE (field) == error_mark_node)
6577 continue;
6579 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6580 TREE_TYPE (field), subclasses,
6581 bit_offset);
6582 if (!num)
6583 return 0;
6584 for (i = 0; i < num && i < words; i++)
6585 classes[i] = merge_classes (subclasses[i], classes[i]);
6588 break;
6590 default:
6591 gcc_unreachable ();
6594 if (words > 2)
6596 /* When size > 16 bytes, if the first one isn't
6597 X86_64_SSE_CLASS or any other ones aren't
6598 X86_64_SSEUP_CLASS, everything should be passed in
6599 memory. */
6600 if (classes[0] != X86_64_SSE_CLASS)
6601 return 0;
6603 for (i = 1; i < words; i++)
6604 if (classes[i] != X86_64_SSEUP_CLASS)
6605 return 0;
6608 /* Final merger cleanup. */
6609 for (i = 0; i < words; i++)
6611 /* If one class is MEMORY, everything should be passed in
6612 memory. */
6613 if (classes[i] == X86_64_MEMORY_CLASS)
6614 return 0;
6616 /* The X86_64_SSEUP_CLASS should be always preceded by
6617 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6618 if (classes[i] == X86_64_SSEUP_CLASS
6619 && classes[i - 1] != X86_64_SSE_CLASS
6620 && classes[i - 1] != X86_64_SSEUP_CLASS)
6622 /* The first one should never be X86_64_SSEUP_CLASS. */
6623 gcc_assert (i != 0);
6624 classes[i] = X86_64_SSE_CLASS;
6627 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6628 everything should be passed in memory. */
6629 if (classes[i] == X86_64_X87UP_CLASS
6630 && (classes[i - 1] != X86_64_X87_CLASS))
6632 static bool warned;
6634 /* The first one should never be X86_64_X87UP_CLASS. */
6635 gcc_assert (i != 0);
6636 if (!warned && warn_psabi)
6638 warned = true;
6639 inform (input_location,
6640 "the ABI of passing union with long double"
6641 " has changed in GCC 4.4");
6643 return 0;
6646 return words;
6649 /* Compute alignment needed. We align all types to natural boundaries with
6650 exception of XFmode that is aligned to 64bits. */
6651 if (mode != VOIDmode && mode != BLKmode)
6653 int mode_alignment = GET_MODE_BITSIZE (mode);
6655 if (mode == XFmode)
6656 mode_alignment = 128;
6657 else if (mode == XCmode)
6658 mode_alignment = 256;
6659 if (COMPLEX_MODE_P (mode))
6660 mode_alignment /= 2;
6661 /* Misaligned fields are always returned in memory. */
6662 if (bit_offset % mode_alignment)
6663 return 0;
6666 /* for V1xx modes, just use the base mode */
6667 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6668 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6669 mode = GET_MODE_INNER (mode);
6671 /* Classification of atomic types. */
6672 switch (mode)
6674 case SDmode:
6675 case DDmode:
6676 classes[0] = X86_64_SSE_CLASS;
6677 return 1;
6678 case TDmode:
6679 classes[0] = X86_64_SSE_CLASS;
6680 classes[1] = X86_64_SSEUP_CLASS;
6681 return 2;
6682 case DImode:
6683 case SImode:
6684 case HImode:
6685 case QImode:
6686 case CSImode:
6687 case CHImode:
6688 case CQImode:
6690 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6692 /* Analyze last 128 bits only. */
6693 size = (size - 1) & 0x7f;
6695 if (size < 32)
6697 classes[0] = X86_64_INTEGERSI_CLASS;
6698 return 1;
6700 else if (size < 64)
6702 classes[0] = X86_64_INTEGER_CLASS;
6703 return 1;
6705 else if (size < 64+32)
6707 classes[0] = X86_64_INTEGER_CLASS;
6708 classes[1] = X86_64_INTEGERSI_CLASS;
6709 return 2;
6711 else if (size < 64+64)
6713 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6714 return 2;
6716 else
6717 gcc_unreachable ();
6719 case CDImode:
6720 case TImode:
6721 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6722 return 2;
6723 case COImode:
6724 case OImode:
6725 /* OImode shouldn't be used directly. */
6726 gcc_unreachable ();
6727 case CTImode:
6728 return 0;
6729 case SFmode:
6730 if (!(bit_offset % 64))
6731 classes[0] = X86_64_SSESF_CLASS;
6732 else
6733 classes[0] = X86_64_SSE_CLASS;
6734 return 1;
6735 case DFmode:
6736 classes[0] = X86_64_SSEDF_CLASS;
6737 return 1;
6738 case XFmode:
6739 classes[0] = X86_64_X87_CLASS;
6740 classes[1] = X86_64_X87UP_CLASS;
6741 return 2;
6742 case TFmode:
6743 classes[0] = X86_64_SSE_CLASS;
6744 classes[1] = X86_64_SSEUP_CLASS;
6745 return 2;
6746 case SCmode:
6747 classes[0] = X86_64_SSE_CLASS;
6748 if (!(bit_offset % 64))
6749 return 1;
6750 else
6752 static bool warned;
6754 if (!warned && warn_psabi)
6756 warned = true;
6757 inform (input_location,
6758 "the ABI of passing structure with complex float"
6759 " member has changed in GCC 4.4");
6761 classes[1] = X86_64_SSESF_CLASS;
6762 return 2;
6764 case DCmode:
6765 classes[0] = X86_64_SSEDF_CLASS;
6766 classes[1] = X86_64_SSEDF_CLASS;
6767 return 2;
6768 case XCmode:
6769 classes[0] = X86_64_COMPLEX_X87_CLASS;
6770 return 1;
6771 case TCmode:
6772 /* This modes is larger than 16 bytes. */
6773 return 0;
6774 case V8SFmode:
6775 case V8SImode:
6776 case V32QImode:
6777 case V16HImode:
6778 case V4DFmode:
6779 case V4DImode:
6780 classes[0] = X86_64_SSE_CLASS;
6781 classes[1] = X86_64_SSEUP_CLASS;
6782 classes[2] = X86_64_SSEUP_CLASS;
6783 classes[3] = X86_64_SSEUP_CLASS;
6784 return 4;
6785 case V8DFmode:
6786 case V16SFmode:
6787 case V8DImode:
6788 case V16SImode:
6789 case V32HImode:
6790 case V64QImode:
6791 classes[0] = X86_64_SSE_CLASS;
6792 classes[1] = X86_64_SSEUP_CLASS;
6793 classes[2] = X86_64_SSEUP_CLASS;
6794 classes[3] = X86_64_SSEUP_CLASS;
6795 classes[4] = X86_64_SSEUP_CLASS;
6796 classes[5] = X86_64_SSEUP_CLASS;
6797 classes[6] = X86_64_SSEUP_CLASS;
6798 classes[7] = X86_64_SSEUP_CLASS;
6799 return 8;
6800 case V4SFmode:
6801 case V4SImode:
6802 case V16QImode:
6803 case V8HImode:
6804 case V2DFmode:
6805 case V2DImode:
6806 classes[0] = X86_64_SSE_CLASS;
6807 classes[1] = X86_64_SSEUP_CLASS;
6808 return 2;
6809 case V1TImode:
6810 case V1DImode:
6811 case V2SFmode:
6812 case V2SImode:
6813 case V4HImode:
6814 case V8QImode:
6815 classes[0] = X86_64_SSE_CLASS;
6816 return 1;
6817 case BLKmode:
6818 case VOIDmode:
6819 return 0;
6820 default:
6821 gcc_assert (VECTOR_MODE_P (mode));
6823 if (bytes > 16)
6824 return 0;
6826 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6828 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6829 classes[0] = X86_64_INTEGERSI_CLASS;
6830 else
6831 classes[0] = X86_64_INTEGER_CLASS;
6832 classes[1] = X86_64_INTEGER_CLASS;
6833 return 1 + (bytes > 8);
6837 /* Examine the argument and return set number of register required in each
6838 class. Return 0 iff parameter should be passed in memory. */
6839 static int
6840 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6841 int *int_nregs, int *sse_nregs)
6843 enum x86_64_reg_class regclass[MAX_CLASSES];
6844 int n = classify_argument (mode, type, regclass, 0);
6846 *int_nregs = 0;
6847 *sse_nregs = 0;
6848 if (!n)
6849 return 0;
6850 for (n--; n >= 0; n--)
6851 switch (regclass[n])
6853 case X86_64_INTEGER_CLASS:
6854 case X86_64_INTEGERSI_CLASS:
6855 (*int_nregs)++;
6856 break;
6857 case X86_64_SSE_CLASS:
6858 case X86_64_SSESF_CLASS:
6859 case X86_64_SSEDF_CLASS:
6860 (*sse_nregs)++;
6861 break;
6862 case X86_64_NO_CLASS:
6863 case X86_64_SSEUP_CLASS:
6864 break;
6865 case X86_64_X87_CLASS:
6866 case X86_64_X87UP_CLASS:
6867 if (!in_return)
6868 return 0;
6869 break;
6870 case X86_64_COMPLEX_X87_CLASS:
6871 return in_return ? 2 : 0;
6872 case X86_64_MEMORY_CLASS:
6873 gcc_unreachable ();
6875 return 1;
6878 /* Construct container for the argument used by GCC interface. See
6879 FUNCTION_ARG for the detailed description. */
6881 static rtx
6882 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6883 const_tree type, int in_return, int nintregs, int nsseregs,
6884 const int *intreg, int sse_regno)
6886 /* The following variables hold the static issued_error state. */
6887 static bool issued_sse_arg_error;
6888 static bool issued_sse_ret_error;
6889 static bool issued_x87_ret_error;
6891 enum machine_mode tmpmode;
6892 int bytes =
6893 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6894 enum x86_64_reg_class regclass[MAX_CLASSES];
6895 int n;
6896 int i;
6897 int nexps = 0;
6898 int needed_sseregs, needed_intregs;
6899 rtx exp[MAX_CLASSES];
6900 rtx ret;
6902 n = classify_argument (mode, type, regclass, 0);
6903 if (!n)
6904 return NULL;
6905 if (!examine_argument (mode, type, in_return, &needed_intregs,
6906 &needed_sseregs))
6907 return NULL;
6908 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6909 return NULL;
6911 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6912 some less clueful developer tries to use floating-point anyway. */
6913 if (needed_sseregs && !TARGET_SSE)
6915 if (in_return)
6917 if (!issued_sse_ret_error)
6919 error ("SSE register return with SSE disabled");
6920 issued_sse_ret_error = true;
6923 else if (!issued_sse_arg_error)
6925 error ("SSE register argument with SSE disabled");
6926 issued_sse_arg_error = true;
6928 return NULL;
6931 /* Likewise, error if the ABI requires us to return values in the
6932 x87 registers and the user specified -mno-80387. */
6933 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6934 for (i = 0; i < n; i++)
6935 if (regclass[i] == X86_64_X87_CLASS
6936 || regclass[i] == X86_64_X87UP_CLASS
6937 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6939 if (!issued_x87_ret_error)
6941 error ("x87 register return with x87 disabled");
6942 issued_x87_ret_error = true;
6944 return NULL;
6947 /* First construct simple cases. Avoid SCmode, since we want to use
6948 single register to pass this type. */
6949 if (n == 1 && mode != SCmode)
6950 switch (regclass[0])
6952 case X86_64_INTEGER_CLASS:
6953 case X86_64_INTEGERSI_CLASS:
6954 return gen_rtx_REG (mode, intreg[0]);
6955 case X86_64_SSE_CLASS:
6956 case X86_64_SSESF_CLASS:
6957 case X86_64_SSEDF_CLASS:
6958 if (mode != BLKmode)
6959 return gen_reg_or_parallel (mode, orig_mode,
6960 SSE_REGNO (sse_regno));
6961 break;
6962 case X86_64_X87_CLASS:
6963 case X86_64_COMPLEX_X87_CLASS:
6964 return gen_rtx_REG (mode, FIRST_STACK_REG);
6965 case X86_64_NO_CLASS:
6966 /* Zero sized array, struct or class. */
6967 return NULL;
6968 default:
6969 gcc_unreachable ();
6971 if (n == 2
6972 && regclass[0] == X86_64_SSE_CLASS
6973 && regclass[1] == X86_64_SSEUP_CLASS
6974 && mode != BLKmode)
6975 return gen_reg_or_parallel (mode, orig_mode,
6976 SSE_REGNO (sse_regno));
6977 if (n == 4
6978 && regclass[0] == X86_64_SSE_CLASS
6979 && regclass[1] == X86_64_SSEUP_CLASS
6980 && regclass[2] == X86_64_SSEUP_CLASS
6981 && regclass[3] == X86_64_SSEUP_CLASS
6982 && mode != BLKmode)
6983 return gen_reg_or_parallel (mode, orig_mode,
6984 SSE_REGNO (sse_regno));
6985 if (n == 8
6986 && regclass[0] == X86_64_SSE_CLASS
6987 && regclass[1] == X86_64_SSEUP_CLASS
6988 && regclass[2] == X86_64_SSEUP_CLASS
6989 && regclass[3] == X86_64_SSEUP_CLASS
6990 && regclass[4] == X86_64_SSEUP_CLASS
6991 && regclass[5] == X86_64_SSEUP_CLASS
6992 && regclass[6] == X86_64_SSEUP_CLASS
6993 && regclass[7] == X86_64_SSEUP_CLASS
6994 && mode != BLKmode)
6995 return gen_reg_or_parallel (mode, orig_mode,
6996 SSE_REGNO (sse_regno));
6997 if (n == 2
6998 && regclass[0] == X86_64_X87_CLASS
6999 && regclass[1] == X86_64_X87UP_CLASS)
7000 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7002 if (n == 2
7003 && regclass[0] == X86_64_INTEGER_CLASS
7004 && regclass[1] == X86_64_INTEGER_CLASS
7005 && (mode == CDImode || mode == TImode)
7006 && intreg[0] + 1 == intreg[1])
7007 return gen_rtx_REG (mode, intreg[0]);
7009 /* Otherwise figure out the entries of the PARALLEL. */
7010 for (i = 0; i < n; i++)
7012 int pos;
7014 switch (regclass[i])
7016 case X86_64_NO_CLASS:
7017 break;
7018 case X86_64_INTEGER_CLASS:
7019 case X86_64_INTEGERSI_CLASS:
7020 /* Merge TImodes on aligned occasions here too. */
7021 if (i * 8 + 8 > bytes)
7022 tmpmode
7023 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7024 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7025 tmpmode = SImode;
7026 else
7027 tmpmode = DImode;
7028 /* We've requested 24 bytes we
7029 don't have mode for. Use DImode. */
7030 if (tmpmode == BLKmode)
7031 tmpmode = DImode;
7032 exp [nexps++]
7033 = gen_rtx_EXPR_LIST (VOIDmode,
7034 gen_rtx_REG (tmpmode, *intreg),
7035 GEN_INT (i*8));
7036 intreg++;
7037 break;
7038 case X86_64_SSESF_CLASS:
7039 exp [nexps++]
7040 = gen_rtx_EXPR_LIST (VOIDmode,
7041 gen_rtx_REG (SFmode,
7042 SSE_REGNO (sse_regno)),
7043 GEN_INT (i*8));
7044 sse_regno++;
7045 break;
7046 case X86_64_SSEDF_CLASS:
7047 exp [nexps++]
7048 = gen_rtx_EXPR_LIST (VOIDmode,
7049 gen_rtx_REG (DFmode,
7050 SSE_REGNO (sse_regno)),
7051 GEN_INT (i*8));
7052 sse_regno++;
7053 break;
7054 case X86_64_SSE_CLASS:
7055 pos = i;
7056 switch (n)
7058 case 1:
7059 tmpmode = DImode;
7060 break;
7061 case 2:
7062 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7064 tmpmode = TImode;
7065 i++;
7067 else
7068 tmpmode = DImode;
7069 break;
7070 case 4:
7071 gcc_assert (i == 0
7072 && regclass[1] == X86_64_SSEUP_CLASS
7073 && regclass[2] == X86_64_SSEUP_CLASS
7074 && regclass[3] == X86_64_SSEUP_CLASS);
7075 tmpmode = OImode;
7076 i += 3;
7077 break;
7078 case 8:
7079 gcc_assert (i == 0
7080 && regclass[1] == X86_64_SSEUP_CLASS
7081 && regclass[2] == X86_64_SSEUP_CLASS
7082 && regclass[3] == X86_64_SSEUP_CLASS
7083 && regclass[4] == X86_64_SSEUP_CLASS
7084 && regclass[5] == X86_64_SSEUP_CLASS
7085 && regclass[6] == X86_64_SSEUP_CLASS
7086 && regclass[7] == X86_64_SSEUP_CLASS);
7087 tmpmode = XImode;
7088 i += 7;
7089 break;
7090 default:
7091 gcc_unreachable ();
7093 exp [nexps++]
7094 = gen_rtx_EXPR_LIST (VOIDmode,
7095 gen_rtx_REG (tmpmode,
7096 SSE_REGNO (sse_regno)),
7097 GEN_INT (pos*8));
7098 sse_regno++;
7099 break;
7100 default:
7101 gcc_unreachable ();
7105 /* Empty aligned struct, union or class. */
7106 if (nexps == 0)
7107 return NULL;
7109 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7110 for (i = 0; i < nexps; i++)
7111 XVECEXP (ret, 0, i) = exp [i];
7112 return ret;
7115 /* Update the data in CUM to advance over an argument of mode MODE
7116 and data type TYPE. (TYPE is null for libcalls where that information
7117 may not be available.) */
7119 static void
7120 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7121 const_tree type, HOST_WIDE_INT bytes,
7122 HOST_WIDE_INT words)
7124 switch (mode)
7126 default:
7127 break;
7129 case BLKmode:
7130 if (bytes < 0)
7131 break;
7132 /* FALLTHRU */
7134 case DImode:
7135 case SImode:
7136 case HImode:
7137 case QImode:
7138 cum->words += words;
7139 cum->nregs -= words;
7140 cum->regno += words;
7142 if (cum->nregs <= 0)
7144 cum->nregs = 0;
7145 cum->regno = 0;
7147 break;
7149 case OImode:
7150 /* OImode shouldn't be used directly. */
7151 gcc_unreachable ();
7153 case DFmode:
7154 if (cum->float_in_sse < 2)
7155 break;
7156 case SFmode:
7157 if (cum->float_in_sse < 1)
7158 break;
7159 /* FALLTHRU */
7161 case V8SFmode:
7162 case V8SImode:
7163 case V64QImode:
7164 case V32HImode:
7165 case V16SImode:
7166 case V8DImode:
7167 case V16SFmode:
7168 case V8DFmode:
7169 case V32QImode:
7170 case V16HImode:
7171 case V4DFmode:
7172 case V4DImode:
7173 case TImode:
7174 case V16QImode:
7175 case V8HImode:
7176 case V4SImode:
7177 case V2DImode:
7178 case V4SFmode:
7179 case V2DFmode:
7180 if (!type || !AGGREGATE_TYPE_P (type))
7182 cum->sse_words += words;
7183 cum->sse_nregs -= 1;
7184 cum->sse_regno += 1;
7185 if (cum->sse_nregs <= 0)
7187 cum->sse_nregs = 0;
7188 cum->sse_regno = 0;
7191 break;
7193 case V8QImode:
7194 case V4HImode:
7195 case V2SImode:
7196 case V2SFmode:
7197 case V1TImode:
7198 case V1DImode:
7199 if (!type || !AGGREGATE_TYPE_P (type))
7201 cum->mmx_words += words;
7202 cum->mmx_nregs -= 1;
7203 cum->mmx_regno += 1;
7204 if (cum->mmx_nregs <= 0)
7206 cum->mmx_nregs = 0;
7207 cum->mmx_regno = 0;
7210 break;
7214 static void
7215 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7216 const_tree type, HOST_WIDE_INT words, bool named)
7218 int int_nregs, sse_nregs;
7220 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7221 if (!named && (VALID_AVX512F_REG_MODE (mode)
7222 || VALID_AVX256_REG_MODE (mode)))
7223 return;
7225 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7226 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7228 cum->nregs -= int_nregs;
7229 cum->sse_nregs -= sse_nregs;
7230 cum->regno += int_nregs;
7231 cum->sse_regno += sse_nregs;
7233 else
7235 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7236 cum->words = (cum->words + align - 1) & ~(align - 1);
7237 cum->words += words;
7241 static void
7242 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7243 HOST_WIDE_INT words)
7245 /* Otherwise, this should be passed indirect. */
7246 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7248 cum->words += words;
7249 if (cum->nregs > 0)
7251 cum->nregs -= 1;
7252 cum->regno += 1;
7256 /* Update the data in CUM to advance over an argument of mode MODE and
7257 data type TYPE. (TYPE is null for libcalls where that information
7258 may not be available.) */
7260 static void
7261 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7262 const_tree type, bool named)
7264 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7265 HOST_WIDE_INT bytes, words;
7267 if (mode == BLKmode)
7268 bytes = int_size_in_bytes (type);
7269 else
7270 bytes = GET_MODE_SIZE (mode);
7271 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7273 if (type)
7274 mode = type_natural_mode (type, NULL, false);
7276 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7277 function_arg_advance_ms_64 (cum, bytes, words);
7278 else if (TARGET_64BIT)
7279 function_arg_advance_64 (cum, mode, type, words, named);
7280 else
7281 function_arg_advance_32 (cum, mode, type, bytes, words);
7284 /* Define where to put the arguments to a function.
7285 Value is zero to push the argument on the stack,
7286 or a hard register in which to store the argument.
7288 MODE is the argument's machine mode.
7289 TYPE is the data type of the argument (as a tree).
7290 This is null for libcalls where that information may
7291 not be available.
7292 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7293 the preceding args and about the function being called.
7294 NAMED is nonzero if this argument is a named parameter
7295 (otherwise it is an extra parameter matching an ellipsis). */
7297 static rtx
7298 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7299 enum machine_mode orig_mode, const_tree type,
7300 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7302 /* Avoid the AL settings for the Unix64 ABI. */
7303 if (mode == VOIDmode)
7304 return constm1_rtx;
7306 switch (mode)
7308 default:
7309 break;
7311 case BLKmode:
7312 if (bytes < 0)
7313 break;
7314 /* FALLTHRU */
7315 case DImode:
7316 case SImode:
7317 case HImode:
7318 case QImode:
7319 if (words <= cum->nregs)
7321 int regno = cum->regno;
7323 /* Fastcall allocates the first two DWORD (SImode) or
7324 smaller arguments to ECX and EDX if it isn't an
7325 aggregate type . */
7326 if (cum->fastcall)
7328 if (mode == BLKmode
7329 || mode == DImode
7330 || (type && AGGREGATE_TYPE_P (type)))
7331 break;
7333 /* ECX not EAX is the first allocated register. */
7334 if (regno == AX_REG)
7335 regno = CX_REG;
7337 return gen_rtx_REG (mode, regno);
7339 break;
7341 case DFmode:
7342 if (cum->float_in_sse < 2)
7343 break;
7344 case SFmode:
7345 if (cum->float_in_sse < 1)
7346 break;
7347 /* FALLTHRU */
7348 case TImode:
7349 /* In 32bit, we pass TImode in xmm registers. */
7350 case V16QImode:
7351 case V8HImode:
7352 case V4SImode:
7353 case V2DImode:
7354 case V4SFmode:
7355 case V2DFmode:
7356 if (!type || !AGGREGATE_TYPE_P (type))
7358 if (cum->sse_nregs)
7359 return gen_reg_or_parallel (mode, orig_mode,
7360 cum->sse_regno + FIRST_SSE_REG);
7362 break;
7364 case OImode:
7365 case XImode:
7366 /* OImode and XImode shouldn't be used directly. */
7367 gcc_unreachable ();
7369 case V64QImode:
7370 case V32HImode:
7371 case V16SImode:
7372 case V8DImode:
7373 case V16SFmode:
7374 case V8DFmode:
7375 case V8SFmode:
7376 case V8SImode:
7377 case V32QImode:
7378 case V16HImode:
7379 case V4DFmode:
7380 case V4DImode:
7381 if (!type || !AGGREGATE_TYPE_P (type))
7383 if (cum->sse_nregs)
7384 return gen_reg_or_parallel (mode, orig_mode,
7385 cum->sse_regno + FIRST_SSE_REG);
7387 break;
7389 case V8QImode:
7390 case V4HImode:
7391 case V2SImode:
7392 case V2SFmode:
7393 case V1TImode:
7394 case V1DImode:
7395 if (!type || !AGGREGATE_TYPE_P (type))
7397 if (cum->mmx_nregs)
7398 return gen_reg_or_parallel (mode, orig_mode,
7399 cum->mmx_regno + FIRST_MMX_REG);
7401 break;
7404 return NULL_RTX;
7407 static rtx
7408 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7409 enum machine_mode orig_mode, const_tree type, bool named)
7411 /* Handle a hidden AL argument containing number of registers
7412 for varargs x86-64 functions. */
7413 if (mode == VOIDmode)
7414 return GEN_INT (cum->maybe_vaarg
7415 ? (cum->sse_nregs < 0
7416 ? X86_64_SSE_REGPARM_MAX
7417 : cum->sse_regno)
7418 : -1);
7420 switch (mode)
7422 default:
7423 break;
7425 case V8SFmode:
7426 case V8SImode:
7427 case V32QImode:
7428 case V16HImode:
7429 case V4DFmode:
7430 case V4DImode:
7431 case V16SFmode:
7432 case V16SImode:
7433 case V64QImode:
7434 case V32HImode:
7435 case V8DFmode:
7436 case V8DImode:
7437 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7438 if (!named)
7439 return NULL;
7440 break;
7443 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7444 cum->sse_nregs,
7445 &x86_64_int_parameter_registers [cum->regno],
7446 cum->sse_regno);
7449 static rtx
7450 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7451 enum machine_mode orig_mode, bool named,
7452 HOST_WIDE_INT bytes)
7454 unsigned int regno;
7456 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7457 We use value of -2 to specify that current function call is MSABI. */
7458 if (mode == VOIDmode)
7459 return GEN_INT (-2);
7461 /* If we've run out of registers, it goes on the stack. */
7462 if (cum->nregs == 0)
7463 return NULL_RTX;
7465 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7467 /* Only floating point modes are passed in anything but integer regs. */
7468 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7470 if (named)
7471 regno = cum->regno + FIRST_SSE_REG;
7472 else
7474 rtx t1, t2;
7476 /* Unnamed floating parameters are passed in both the
7477 SSE and integer registers. */
7478 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7479 t2 = gen_rtx_REG (mode, regno);
7480 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7481 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7482 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7485 /* Handle aggregated types passed in register. */
7486 if (orig_mode == BLKmode)
7488 if (bytes > 0 && bytes <= 8)
7489 mode = (bytes > 4 ? DImode : SImode);
7490 if (mode == BLKmode)
7491 mode = DImode;
7494 return gen_reg_or_parallel (mode, orig_mode, regno);
7497 /* Return where to put the arguments to a function.
7498 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7500 MODE is the argument's machine mode. TYPE is the data type of the
7501 argument. It is null for libcalls where that information may not be
7502 available. CUM gives information about the preceding args and about
7503 the function being called. NAMED is nonzero if this argument is a
7504 named parameter (otherwise it is an extra parameter matching an
7505 ellipsis). */
7507 static rtx
7508 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7509 const_tree type, bool named)
7511 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7512 enum machine_mode mode = omode;
7513 HOST_WIDE_INT bytes, words;
7514 rtx arg;
7516 if (mode == BLKmode)
7517 bytes = int_size_in_bytes (type);
7518 else
7519 bytes = GET_MODE_SIZE (mode);
7520 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7522 /* To simplify the code below, represent vector types with a vector mode
7523 even if MMX/SSE are not active. */
7524 if (type && TREE_CODE (type) == VECTOR_TYPE)
7525 mode = type_natural_mode (type, cum, false);
7527 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7528 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7529 else if (TARGET_64BIT)
7530 arg = function_arg_64 (cum, mode, omode, type, named);
7531 else
7532 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7534 return arg;
7537 /* A C expression that indicates when an argument must be passed by
7538 reference. If nonzero for an argument, a copy of that argument is
7539 made in memory and a pointer to the argument is passed instead of
7540 the argument itself. The pointer is passed in whatever way is
7541 appropriate for passing a pointer to that type. */
7543 static bool
7544 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7545 const_tree type, bool named ATTRIBUTE_UNUSED)
7547 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7549 /* See Windows x64 Software Convention. */
7550 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7552 int msize = (int) GET_MODE_SIZE (mode);
7553 if (type)
7555 /* Arrays are passed by reference. */
7556 if (TREE_CODE (type) == ARRAY_TYPE)
7557 return true;
7559 if (AGGREGATE_TYPE_P (type))
7561 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7562 are passed by reference. */
7563 msize = int_size_in_bytes (type);
7567 /* __m128 is passed by reference. */
7568 switch (msize) {
7569 case 1: case 2: case 4: case 8:
7570 break;
7571 default:
7572 return true;
7575 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7576 return 1;
7578 return 0;
7581 /* Return true when TYPE should be 128bit aligned for 32bit argument
7582 passing ABI. XXX: This function is obsolete and is only used for
7583 checking psABI compatibility with previous versions of GCC. */
7585 static bool
7586 ix86_compat_aligned_value_p (const_tree type)
7588 enum machine_mode mode = TYPE_MODE (type);
7589 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7590 || mode == TDmode
7591 || mode == TFmode
7592 || mode == TCmode)
7593 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7594 return true;
7595 if (TYPE_ALIGN (type) < 128)
7596 return false;
7598 if (AGGREGATE_TYPE_P (type))
7600 /* Walk the aggregates recursively. */
7601 switch (TREE_CODE (type))
7603 case RECORD_TYPE:
7604 case UNION_TYPE:
7605 case QUAL_UNION_TYPE:
7607 tree field;
7609 /* Walk all the structure fields. */
7610 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7612 if (TREE_CODE (field) == FIELD_DECL
7613 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7614 return true;
7616 break;
7619 case ARRAY_TYPE:
7620 /* Just for use if some languages passes arrays by value. */
7621 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7622 return true;
7623 break;
7625 default:
7626 gcc_unreachable ();
7629 return false;
7632 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7633 XXX: This function is obsolete and is only used for checking psABI
7634 compatibility with previous versions of GCC. */
7636 static unsigned int
7637 ix86_compat_function_arg_boundary (enum machine_mode mode,
7638 const_tree type, unsigned int align)
7640 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7641 natural boundaries. */
7642 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7644 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7645 make an exception for SSE modes since these require 128bit
7646 alignment.
7648 The handling here differs from field_alignment. ICC aligns MMX
7649 arguments to 4 byte boundaries, while structure fields are aligned
7650 to 8 byte boundaries. */
7651 if (!type)
7653 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7654 align = PARM_BOUNDARY;
7656 else
7658 if (!ix86_compat_aligned_value_p (type))
7659 align = PARM_BOUNDARY;
7662 if (align > BIGGEST_ALIGNMENT)
7663 align = BIGGEST_ALIGNMENT;
7664 return align;
7667 /* Return true when TYPE should be 128bit aligned for 32bit argument
7668 passing ABI. */
7670 static bool
7671 ix86_contains_aligned_value_p (const_tree type)
7673 enum machine_mode mode = TYPE_MODE (type);
7675 if (mode == XFmode || mode == XCmode)
7676 return false;
7678 if (TYPE_ALIGN (type) < 128)
7679 return false;
7681 if (AGGREGATE_TYPE_P (type))
7683 /* Walk the aggregates recursively. */
7684 switch (TREE_CODE (type))
7686 case RECORD_TYPE:
7687 case UNION_TYPE:
7688 case QUAL_UNION_TYPE:
7690 tree field;
7692 /* Walk all the structure fields. */
7693 for (field = TYPE_FIELDS (type);
7694 field;
7695 field = DECL_CHAIN (field))
7697 if (TREE_CODE (field) == FIELD_DECL
7698 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7699 return true;
7701 break;
7704 case ARRAY_TYPE:
7705 /* Just for use if some languages passes arrays by value. */
7706 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7707 return true;
7708 break;
7710 default:
7711 gcc_unreachable ();
7714 else
7715 return TYPE_ALIGN (type) >= 128;
7717 return false;
7720 /* Gives the alignment boundary, in bits, of an argument with the
7721 specified mode and type. */
7723 static unsigned int
7724 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7726 unsigned int align;
7727 if (type)
7729 /* Since the main variant type is used for call, we convert it to
7730 the main variant type. */
7731 type = TYPE_MAIN_VARIANT (type);
7732 align = TYPE_ALIGN (type);
7734 else
7735 align = GET_MODE_ALIGNMENT (mode);
7736 if (align < PARM_BOUNDARY)
7737 align = PARM_BOUNDARY;
7738 else
7740 static bool warned;
7741 unsigned int saved_align = align;
7743 if (!TARGET_64BIT)
7745 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7746 if (!type)
7748 if (mode == XFmode || mode == XCmode)
7749 align = PARM_BOUNDARY;
7751 else if (!ix86_contains_aligned_value_p (type))
7752 align = PARM_BOUNDARY;
7754 if (align < 128)
7755 align = PARM_BOUNDARY;
7758 if (warn_psabi
7759 && !warned
7760 && align != ix86_compat_function_arg_boundary (mode, type,
7761 saved_align))
7763 warned = true;
7764 inform (input_location,
7765 "The ABI for passing parameters with %d-byte"
7766 " alignment has changed in GCC 4.6",
7767 align / BITS_PER_UNIT);
7771 return align;
7774 /* Return true if N is a possible register number of function value. */
7776 static bool
7777 ix86_function_value_regno_p (const unsigned int regno)
7779 switch (regno)
7781 case AX_REG:
7782 case DX_REG:
7783 return true;
7784 case DI_REG:
7785 case SI_REG:
7786 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
7788 /* Complex values are returned in %st(0)/%st(1) pair. */
7789 case ST0_REG:
7790 case ST1_REG:
7791 /* TODO: The function should depend on current function ABI but
7792 builtins.c would need updating then. Therefore we use the
7793 default ABI. */
7794 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
7795 return false;
7796 return TARGET_FLOAT_RETURNS_IN_80387;
7798 /* Complex values are returned in %xmm0/%xmm1 pair. */
7799 case XMM0_REG:
7800 case XMM1_REG:
7801 return TARGET_SSE;
7803 case MM0_REG:
7804 if (TARGET_MACHO || TARGET_64BIT)
7805 return false;
7806 return TARGET_MMX;
7809 return false;
7812 /* Define how to find the value returned by a function.
7813 VALTYPE is the data type of the value (as a tree).
7814 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7815 otherwise, FUNC is 0. */
7817 static rtx
7818 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7819 const_tree fntype, const_tree fn)
7821 unsigned int regno;
7823 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7824 we normally prevent this case when mmx is not available. However
7825 some ABIs may require the result to be returned like DImode. */
7826 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7827 regno = FIRST_MMX_REG;
7829 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7830 we prevent this case when sse is not available. However some ABIs
7831 may require the result to be returned like integer TImode. */
7832 else if (mode == TImode
7833 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7834 regno = FIRST_SSE_REG;
7836 /* 32-byte vector modes in %ymm0. */
7837 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7838 regno = FIRST_SSE_REG;
7840 /* 64-byte vector modes in %zmm0. */
7841 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7842 regno = FIRST_SSE_REG;
7844 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7845 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7846 regno = FIRST_FLOAT_REG;
7847 else
7848 /* Most things go in %eax. */
7849 regno = AX_REG;
7851 /* Override FP return register with %xmm0 for local functions when
7852 SSE math is enabled or for functions with sseregparm attribute. */
7853 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7855 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7856 if ((sse_level >= 1 && mode == SFmode)
7857 || (sse_level == 2 && mode == DFmode))
7858 regno = FIRST_SSE_REG;
7861 /* OImode shouldn't be used directly. */
7862 gcc_assert (mode != OImode);
7864 return gen_rtx_REG (orig_mode, regno);
7867 static rtx
7868 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7869 const_tree valtype)
7871 rtx ret;
7873 /* Handle libcalls, which don't provide a type node. */
7874 if (valtype == NULL)
7876 unsigned int regno;
7878 switch (mode)
7880 case SFmode:
7881 case SCmode:
7882 case DFmode:
7883 case DCmode:
7884 case TFmode:
7885 case SDmode:
7886 case DDmode:
7887 case TDmode:
7888 regno = FIRST_SSE_REG;
7889 break;
7890 case XFmode:
7891 case XCmode:
7892 regno = FIRST_FLOAT_REG;
7893 break;
7894 case TCmode:
7895 return NULL;
7896 default:
7897 regno = AX_REG;
7900 return gen_rtx_REG (mode, regno);
7902 else if (POINTER_TYPE_P (valtype))
7904 /* Pointers are always returned in word_mode. */
7905 mode = word_mode;
7908 ret = construct_container (mode, orig_mode, valtype, 1,
7909 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7910 x86_64_int_return_registers, 0);
7912 /* For zero sized structures, construct_container returns NULL, but we
7913 need to keep rest of compiler happy by returning meaningful value. */
7914 if (!ret)
7915 ret = gen_rtx_REG (orig_mode, AX_REG);
7917 return ret;
7920 static rtx
7921 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7922 const_tree valtype)
7924 unsigned int regno = AX_REG;
7926 if (TARGET_SSE)
7928 switch (GET_MODE_SIZE (mode))
7930 case 16:
7931 if (valtype != NULL_TREE
7932 && !VECTOR_INTEGER_TYPE_P (valtype)
7933 && !VECTOR_INTEGER_TYPE_P (valtype)
7934 && !INTEGRAL_TYPE_P (valtype)
7935 && !VECTOR_FLOAT_TYPE_P (valtype))
7936 break;
7937 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7938 && !COMPLEX_MODE_P (mode))
7939 regno = FIRST_SSE_REG;
7940 break;
7941 case 8:
7942 case 4:
7943 if (mode == SFmode || mode == DFmode)
7944 regno = FIRST_SSE_REG;
7945 break;
7946 default:
7947 break;
7950 return gen_rtx_REG (orig_mode, regno);
7953 static rtx
7954 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7955 enum machine_mode orig_mode, enum machine_mode mode)
7957 const_tree fn, fntype;
7959 fn = NULL_TREE;
7960 if (fntype_or_decl && DECL_P (fntype_or_decl))
7961 fn = fntype_or_decl;
7962 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7964 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7965 return function_value_ms_64 (orig_mode, mode, valtype);
7966 else if (TARGET_64BIT)
7967 return function_value_64 (orig_mode, mode, valtype);
7968 else
7969 return function_value_32 (orig_mode, mode, fntype, fn);
7972 static rtx
7973 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7974 bool outgoing ATTRIBUTE_UNUSED)
7976 enum machine_mode mode, orig_mode;
7978 orig_mode = TYPE_MODE (valtype);
7979 mode = type_natural_mode (valtype, NULL, true);
7980 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7983 /* Pointer function arguments and return values are promoted to
7984 word_mode. */
7986 static enum machine_mode
7987 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7988 int *punsignedp, const_tree fntype,
7989 int for_return)
7991 if (type != NULL_TREE && POINTER_TYPE_P (type))
7993 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7994 return word_mode;
7996 return default_promote_function_mode (type, mode, punsignedp, fntype,
7997 for_return);
8000 /* Return true if a structure, union or array with MODE containing FIELD
8001 should be accessed using BLKmode. */
8003 static bool
8004 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8006 /* Union with XFmode must be in BLKmode. */
8007 return (mode == XFmode
8008 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8009 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8013 ix86_libcall_value (enum machine_mode mode)
8015 return ix86_function_value_1 (NULL, NULL, mode, mode);
8018 /* Return true iff type is returned in memory. */
8020 static bool ATTRIBUTE_UNUSED
8021 return_in_memory_32 (const_tree type, enum machine_mode mode)
8023 HOST_WIDE_INT size;
8025 if (mode == BLKmode)
8026 return true;
8028 size = int_size_in_bytes (type);
8030 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8031 return false;
8033 if (VECTOR_MODE_P (mode) || mode == TImode)
8035 /* User-created vectors small enough to fit in EAX. */
8036 if (size < 8)
8037 return false;
8039 /* MMX/3dNow values are returned in MM0,
8040 except when it doesn't exits or the ABI prescribes otherwise. */
8041 if (size == 8)
8042 return !TARGET_MMX || TARGET_VECT8_RETURNS;
8044 /* SSE values are returned in XMM0, except when it doesn't exist. */
8045 if (size == 16)
8046 return !TARGET_SSE;
8048 /* AVX values are returned in YMM0, except when it doesn't exist. */
8049 if (size == 32)
8050 return !TARGET_AVX;
8052 /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
8053 if (size == 64)
8054 return !TARGET_AVX512F;
8057 if (mode == XFmode)
8058 return false;
8060 if (size > 12)
8061 return true;
8063 /* OImode shouldn't be used directly. */
8064 gcc_assert (mode != OImode);
8066 return false;
8069 static bool ATTRIBUTE_UNUSED
8070 return_in_memory_64 (const_tree type, enum machine_mode mode)
8072 int needed_intregs, needed_sseregs;
8073 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
8076 static bool ATTRIBUTE_UNUSED
8077 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
8079 HOST_WIDE_INT size = int_size_in_bytes (type);
8081 /* __m128 is returned in xmm0. */
8082 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
8083 || VECTOR_FLOAT_TYPE_P (type))
8084 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8085 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
8086 return false;
8088 /* Otherwise, the size must be exactly in [1248]. */
8089 return size != 1 && size != 2 && size != 4 && size != 8;
8092 static bool
8093 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8095 #ifdef SUBTARGET_RETURN_IN_MEMORY
8096 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8097 #else
8098 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8100 if (TARGET_64BIT)
8102 if (ix86_function_type_abi (fntype) == MS_ABI)
8103 return return_in_memory_ms_64 (type, mode);
8104 else
8105 return return_in_memory_64 (type, mode);
8107 else
8108 return return_in_memory_32 (type, mode);
8109 #endif
8113 /* Create the va_list data type. */
8115 /* Returns the calling convention specific va_list date type.
8116 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8118 static tree
8119 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8121 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8123 /* For i386 we use plain pointer to argument area. */
8124 if (!TARGET_64BIT || abi == MS_ABI)
8125 return build_pointer_type (char_type_node);
8127 record = lang_hooks.types.make_type (RECORD_TYPE);
8128 type_decl = build_decl (BUILTINS_LOCATION,
8129 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8131 f_gpr = build_decl (BUILTINS_LOCATION,
8132 FIELD_DECL, get_identifier ("gp_offset"),
8133 unsigned_type_node);
8134 f_fpr = build_decl (BUILTINS_LOCATION,
8135 FIELD_DECL, get_identifier ("fp_offset"),
8136 unsigned_type_node);
8137 f_ovf = build_decl (BUILTINS_LOCATION,
8138 FIELD_DECL, get_identifier ("overflow_arg_area"),
8139 ptr_type_node);
8140 f_sav = build_decl (BUILTINS_LOCATION,
8141 FIELD_DECL, get_identifier ("reg_save_area"),
8142 ptr_type_node);
8144 va_list_gpr_counter_field = f_gpr;
8145 va_list_fpr_counter_field = f_fpr;
8147 DECL_FIELD_CONTEXT (f_gpr) = record;
8148 DECL_FIELD_CONTEXT (f_fpr) = record;
8149 DECL_FIELD_CONTEXT (f_ovf) = record;
8150 DECL_FIELD_CONTEXT (f_sav) = record;
8152 TYPE_STUB_DECL (record) = type_decl;
8153 TYPE_NAME (record) = type_decl;
8154 TYPE_FIELDS (record) = f_gpr;
8155 DECL_CHAIN (f_gpr) = f_fpr;
8156 DECL_CHAIN (f_fpr) = f_ovf;
8157 DECL_CHAIN (f_ovf) = f_sav;
8159 layout_type (record);
8161 /* The correct type is an array type of one element. */
8162 return build_array_type (record, build_index_type (size_zero_node));
8165 /* Setup the builtin va_list data type and for 64-bit the additional
8166 calling convention specific va_list data types. */
8168 static tree
8169 ix86_build_builtin_va_list (void)
8171 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8173 /* Initialize abi specific va_list builtin types. */
8174 if (TARGET_64BIT)
8176 tree t;
8177 if (ix86_abi == MS_ABI)
8179 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8180 if (TREE_CODE (t) != RECORD_TYPE)
8181 t = build_variant_type_copy (t);
8182 sysv_va_list_type_node = t;
8184 else
8186 t = ret;
8187 if (TREE_CODE (t) != RECORD_TYPE)
8188 t = build_variant_type_copy (t);
8189 sysv_va_list_type_node = t;
8191 if (ix86_abi != MS_ABI)
8193 t = ix86_build_builtin_va_list_abi (MS_ABI);
8194 if (TREE_CODE (t) != RECORD_TYPE)
8195 t = build_variant_type_copy (t);
8196 ms_va_list_type_node = t;
8198 else
8200 t = ret;
8201 if (TREE_CODE (t) != RECORD_TYPE)
8202 t = build_variant_type_copy (t);
8203 ms_va_list_type_node = t;
8207 return ret;
8210 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8212 static void
8213 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8215 rtx save_area, mem;
8216 alias_set_type set;
8217 int i, max;
8219 /* GPR size of varargs save area. */
8220 if (cfun->va_list_gpr_size)
8221 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8222 else
8223 ix86_varargs_gpr_size = 0;
8225 /* FPR size of varargs save area. We don't need it if we don't pass
8226 anything in SSE registers. */
8227 if (TARGET_SSE && cfun->va_list_fpr_size)
8228 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8229 else
8230 ix86_varargs_fpr_size = 0;
8232 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8233 return;
8235 save_area = frame_pointer_rtx;
8236 set = get_varargs_alias_set ();
8238 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8239 if (max > X86_64_REGPARM_MAX)
8240 max = X86_64_REGPARM_MAX;
8242 for (i = cum->regno; i < max; i++)
8244 mem = gen_rtx_MEM (word_mode,
8245 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8246 MEM_NOTRAP_P (mem) = 1;
8247 set_mem_alias_set (mem, set);
8248 emit_move_insn (mem,
8249 gen_rtx_REG (word_mode,
8250 x86_64_int_parameter_registers[i]));
8253 if (ix86_varargs_fpr_size)
8255 enum machine_mode smode;
8256 rtx label, test;
8258 /* Now emit code to save SSE registers. The AX parameter contains number
8259 of SSE parameter registers used to call this function, though all we
8260 actually check here is the zero/non-zero status. */
8262 label = gen_label_rtx ();
8263 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8264 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8265 label));
8267 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8268 we used movdqa (i.e. TImode) instead? Perhaps even better would
8269 be if we could determine the real mode of the data, via a hook
8270 into pass_stdarg. Ignore all that for now. */
8271 smode = V4SFmode;
8272 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8273 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8275 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8276 if (max > X86_64_SSE_REGPARM_MAX)
8277 max = X86_64_SSE_REGPARM_MAX;
8279 for (i = cum->sse_regno; i < max; ++i)
8281 mem = plus_constant (Pmode, save_area,
8282 i * 16 + ix86_varargs_gpr_size);
8283 mem = gen_rtx_MEM (smode, mem);
8284 MEM_NOTRAP_P (mem) = 1;
8285 set_mem_alias_set (mem, set);
8286 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8288 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8291 emit_label (label);
8295 static void
8296 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8298 alias_set_type set = get_varargs_alias_set ();
8299 int i;
8301 /* Reset to zero, as there might be a sysv vaarg used
8302 before. */
8303 ix86_varargs_gpr_size = 0;
8304 ix86_varargs_fpr_size = 0;
8306 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8308 rtx reg, mem;
8310 mem = gen_rtx_MEM (Pmode,
8311 plus_constant (Pmode, virtual_incoming_args_rtx,
8312 i * UNITS_PER_WORD));
8313 MEM_NOTRAP_P (mem) = 1;
8314 set_mem_alias_set (mem, set);
8316 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8317 emit_move_insn (mem, reg);
8321 static void
8322 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8323 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8324 int no_rtl)
8326 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8327 CUMULATIVE_ARGS next_cum;
8328 tree fntype;
8330 /* This argument doesn't appear to be used anymore. Which is good,
8331 because the old code here didn't suppress rtl generation. */
8332 gcc_assert (!no_rtl);
8334 if (!TARGET_64BIT)
8335 return;
8337 fntype = TREE_TYPE (current_function_decl);
8339 /* For varargs, we do not want to skip the dummy va_dcl argument.
8340 For stdargs, we do want to skip the last named argument. */
8341 next_cum = *cum;
8342 if (stdarg_p (fntype))
8343 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8344 true);
8346 if (cum->call_abi == MS_ABI)
8347 setup_incoming_varargs_ms_64 (&next_cum);
8348 else
8349 setup_incoming_varargs_64 (&next_cum);
8352 /* Checks if TYPE is of kind va_list char *. */
8354 static bool
8355 is_va_list_char_pointer (tree type)
8357 tree canonic;
8359 /* For 32-bit it is always true. */
8360 if (!TARGET_64BIT)
8361 return true;
8362 canonic = ix86_canonical_va_list_type (type);
8363 return (canonic == ms_va_list_type_node
8364 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8367 /* Implement va_start. */
8369 static void
8370 ix86_va_start (tree valist, rtx nextarg)
8372 HOST_WIDE_INT words, n_gpr, n_fpr;
8373 tree f_gpr, f_fpr, f_ovf, f_sav;
8374 tree gpr, fpr, ovf, sav, t;
8375 tree type;
8376 rtx ovf_rtx;
8378 if (flag_split_stack
8379 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8381 unsigned int scratch_regno;
8383 /* When we are splitting the stack, we can't refer to the stack
8384 arguments using internal_arg_pointer, because they may be on
8385 the old stack. The split stack prologue will arrange to
8386 leave a pointer to the old stack arguments in a scratch
8387 register, which we here copy to a pseudo-register. The split
8388 stack prologue can't set the pseudo-register directly because
8389 it (the prologue) runs before any registers have been saved. */
8391 scratch_regno = split_stack_prologue_scratch_regno ();
8392 if (scratch_regno != INVALID_REGNUM)
8394 rtx reg, seq;
8396 reg = gen_reg_rtx (Pmode);
8397 cfun->machine->split_stack_varargs_pointer = reg;
8399 start_sequence ();
8400 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8401 seq = get_insns ();
8402 end_sequence ();
8404 push_topmost_sequence ();
8405 emit_insn_after (seq, entry_of_function ());
8406 pop_topmost_sequence ();
8410 /* Only 64bit target needs something special. */
8411 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8413 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8414 std_expand_builtin_va_start (valist, nextarg);
8415 else
8417 rtx va_r, next;
8419 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8420 next = expand_binop (ptr_mode, add_optab,
8421 cfun->machine->split_stack_varargs_pointer,
8422 crtl->args.arg_offset_rtx,
8423 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8424 convert_move (va_r, next, 0);
8426 return;
8429 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8430 f_fpr = DECL_CHAIN (f_gpr);
8431 f_ovf = DECL_CHAIN (f_fpr);
8432 f_sav = DECL_CHAIN (f_ovf);
8434 valist = build_simple_mem_ref (valist);
8435 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8436 /* The following should be folded into the MEM_REF offset. */
8437 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8438 f_gpr, NULL_TREE);
8439 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8440 f_fpr, NULL_TREE);
8441 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8442 f_ovf, NULL_TREE);
8443 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8444 f_sav, NULL_TREE);
8446 /* Count number of gp and fp argument registers used. */
8447 words = crtl->args.info.words;
8448 n_gpr = crtl->args.info.regno;
8449 n_fpr = crtl->args.info.sse_regno;
8451 if (cfun->va_list_gpr_size)
8453 type = TREE_TYPE (gpr);
8454 t = build2 (MODIFY_EXPR, type,
8455 gpr, build_int_cst (type, n_gpr * 8));
8456 TREE_SIDE_EFFECTS (t) = 1;
8457 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8460 if (TARGET_SSE && cfun->va_list_fpr_size)
8462 type = TREE_TYPE (fpr);
8463 t = build2 (MODIFY_EXPR, type, fpr,
8464 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8465 TREE_SIDE_EFFECTS (t) = 1;
8466 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8469 /* Find the overflow area. */
8470 type = TREE_TYPE (ovf);
8471 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8472 ovf_rtx = crtl->args.internal_arg_pointer;
8473 else
8474 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8475 t = make_tree (type, ovf_rtx);
8476 if (words != 0)
8477 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8478 t = build2 (MODIFY_EXPR, type, ovf, t);
8479 TREE_SIDE_EFFECTS (t) = 1;
8480 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8482 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8484 /* Find the register save area.
8485 Prologue of the function save it right above stack frame. */
8486 type = TREE_TYPE (sav);
8487 t = make_tree (type, frame_pointer_rtx);
8488 if (!ix86_varargs_gpr_size)
8489 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8490 t = build2 (MODIFY_EXPR, type, sav, t);
8491 TREE_SIDE_EFFECTS (t) = 1;
8492 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8496 /* Implement va_arg. */
8498 static tree
8499 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8500 gimple_seq *post_p)
8502 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8503 tree f_gpr, f_fpr, f_ovf, f_sav;
8504 tree gpr, fpr, ovf, sav, t;
8505 int size, rsize;
8506 tree lab_false, lab_over = NULL_TREE;
8507 tree addr, t2;
8508 rtx container;
8509 int indirect_p = 0;
8510 tree ptrtype;
8511 enum machine_mode nat_mode;
8512 unsigned int arg_boundary;
8514 /* Only 64bit target needs something special. */
8515 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8516 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8518 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8519 f_fpr = DECL_CHAIN (f_gpr);
8520 f_ovf = DECL_CHAIN (f_fpr);
8521 f_sav = DECL_CHAIN (f_ovf);
8523 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8524 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8525 valist = build_va_arg_indirect_ref (valist);
8526 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8527 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8528 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8530 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8531 if (indirect_p)
8532 type = build_pointer_type (type);
8533 size = int_size_in_bytes (type);
8534 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8536 nat_mode = type_natural_mode (type, NULL, false);
8537 switch (nat_mode)
8539 case V8SFmode:
8540 case V8SImode:
8541 case V32QImode:
8542 case V16HImode:
8543 case V4DFmode:
8544 case V4DImode:
8545 case V16SFmode:
8546 case V16SImode:
8547 case V64QImode:
8548 case V32HImode:
8549 case V8DFmode:
8550 case V8DImode:
8551 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8552 if (!TARGET_64BIT_MS_ABI)
8554 container = NULL;
8555 break;
8558 default:
8559 container = construct_container (nat_mode, TYPE_MODE (type),
8560 type, 0, X86_64_REGPARM_MAX,
8561 X86_64_SSE_REGPARM_MAX, intreg,
8563 break;
8566 /* Pull the value out of the saved registers. */
8568 addr = create_tmp_var (ptr_type_node, "addr");
8570 if (container)
8572 int needed_intregs, needed_sseregs;
8573 bool need_temp;
8574 tree int_addr, sse_addr;
8576 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8577 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8579 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8581 need_temp = (!REG_P (container)
8582 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8583 || TYPE_ALIGN (type) > 128));
8585 /* In case we are passing structure, verify that it is consecutive block
8586 on the register save area. If not we need to do moves. */
8587 if (!need_temp && !REG_P (container))
8589 /* Verify that all registers are strictly consecutive */
8590 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8592 int i;
8594 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8596 rtx slot = XVECEXP (container, 0, i);
8597 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8598 || INTVAL (XEXP (slot, 1)) != i * 16)
8599 need_temp = 1;
8602 else
8604 int i;
8606 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8608 rtx slot = XVECEXP (container, 0, i);
8609 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8610 || INTVAL (XEXP (slot, 1)) != i * 8)
8611 need_temp = 1;
8615 if (!need_temp)
8617 int_addr = addr;
8618 sse_addr = addr;
8620 else
8622 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8623 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8626 /* First ensure that we fit completely in registers. */
8627 if (needed_intregs)
8629 t = build_int_cst (TREE_TYPE (gpr),
8630 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8631 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8632 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8633 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8634 gimplify_and_add (t, pre_p);
8636 if (needed_sseregs)
8638 t = build_int_cst (TREE_TYPE (fpr),
8639 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8640 + X86_64_REGPARM_MAX * 8);
8641 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8642 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8643 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8644 gimplify_and_add (t, pre_p);
8647 /* Compute index to start of area used for integer regs. */
8648 if (needed_intregs)
8650 /* int_addr = gpr + sav; */
8651 t = fold_build_pointer_plus (sav, gpr);
8652 gimplify_assign (int_addr, t, pre_p);
8654 if (needed_sseregs)
8656 /* sse_addr = fpr + sav; */
8657 t = fold_build_pointer_plus (sav, fpr);
8658 gimplify_assign (sse_addr, t, pre_p);
8660 if (need_temp)
8662 int i, prev_size = 0;
8663 tree temp = create_tmp_var (type, "va_arg_tmp");
8665 /* addr = &temp; */
8666 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8667 gimplify_assign (addr, t, pre_p);
8669 for (i = 0; i < XVECLEN (container, 0); i++)
8671 rtx slot = XVECEXP (container, 0, i);
8672 rtx reg = XEXP (slot, 0);
8673 enum machine_mode mode = GET_MODE (reg);
8674 tree piece_type;
8675 tree addr_type;
8676 tree daddr_type;
8677 tree src_addr, src;
8678 int src_offset;
8679 tree dest_addr, dest;
8680 int cur_size = GET_MODE_SIZE (mode);
8682 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8683 prev_size = INTVAL (XEXP (slot, 1));
8684 if (prev_size + cur_size > size)
8686 cur_size = size - prev_size;
8687 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8688 if (mode == BLKmode)
8689 mode = QImode;
8691 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8692 if (mode == GET_MODE (reg))
8693 addr_type = build_pointer_type (piece_type);
8694 else
8695 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8696 true);
8697 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8698 true);
8700 if (SSE_REGNO_P (REGNO (reg)))
8702 src_addr = sse_addr;
8703 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8705 else
8707 src_addr = int_addr;
8708 src_offset = REGNO (reg) * 8;
8710 src_addr = fold_convert (addr_type, src_addr);
8711 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8713 dest_addr = fold_convert (daddr_type, addr);
8714 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8715 if (cur_size == GET_MODE_SIZE (mode))
8717 src = build_va_arg_indirect_ref (src_addr);
8718 dest = build_va_arg_indirect_ref (dest_addr);
8720 gimplify_assign (dest, src, pre_p);
8722 else
8724 tree copy
8725 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8726 3, dest_addr, src_addr,
8727 size_int (cur_size));
8728 gimplify_and_add (copy, pre_p);
8730 prev_size += cur_size;
8734 if (needed_intregs)
8736 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8737 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8738 gimplify_assign (gpr, t, pre_p);
8741 if (needed_sseregs)
8743 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8744 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8745 gimplify_assign (fpr, t, pre_p);
8748 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8750 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8753 /* ... otherwise out of the overflow area. */
8755 /* When we align parameter on stack for caller, if the parameter
8756 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8757 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8758 here with caller. */
8759 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8760 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8761 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8763 /* Care for on-stack alignment if needed. */
8764 if (arg_boundary <= 64 || size == 0)
8765 t = ovf;
8766 else
8768 HOST_WIDE_INT align = arg_boundary / 8;
8769 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8770 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8771 build_int_cst (TREE_TYPE (t), -align));
8774 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8775 gimplify_assign (addr, t, pre_p);
8777 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8778 gimplify_assign (unshare_expr (ovf), t, pre_p);
8780 if (container)
8781 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8783 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8784 addr = fold_convert (ptrtype, addr);
8786 if (indirect_p)
8787 addr = build_va_arg_indirect_ref (addr);
8788 return build_va_arg_indirect_ref (addr);
8791 /* Return true if OPNUM's MEM should be matched
8792 in movabs* patterns. */
8794 bool
8795 ix86_check_movabs (rtx insn, int opnum)
8797 rtx set, mem;
8799 set = PATTERN (insn);
8800 if (GET_CODE (set) == PARALLEL)
8801 set = XVECEXP (set, 0, 0);
8802 gcc_assert (GET_CODE (set) == SET);
8803 mem = XEXP (set, opnum);
8804 while (GET_CODE (mem) == SUBREG)
8805 mem = SUBREG_REG (mem);
8806 gcc_assert (MEM_P (mem));
8807 return volatile_ok || !MEM_VOLATILE_P (mem);
8810 /* Initialize the table of extra 80387 mathematical constants. */
8812 static void
8813 init_ext_80387_constants (void)
8815 static const char * cst[5] =
8817 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8818 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8819 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8820 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8821 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8823 int i;
8825 for (i = 0; i < 5; i++)
8827 real_from_string (&ext_80387_constants_table[i], cst[i]);
8828 /* Ensure each constant is rounded to XFmode precision. */
8829 real_convert (&ext_80387_constants_table[i],
8830 XFmode, &ext_80387_constants_table[i]);
8833 ext_80387_constants_init = 1;
8836 /* Return non-zero if the constant is something that
8837 can be loaded with a special instruction. */
8840 standard_80387_constant_p (rtx x)
8842 enum machine_mode mode = GET_MODE (x);
8844 REAL_VALUE_TYPE r;
8846 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8847 return -1;
8849 if (x == CONST0_RTX (mode))
8850 return 1;
8851 if (x == CONST1_RTX (mode))
8852 return 2;
8854 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8856 /* For XFmode constants, try to find a special 80387 instruction when
8857 optimizing for size or on those CPUs that benefit from them. */
8858 if (mode == XFmode
8859 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8861 int i;
8863 if (! ext_80387_constants_init)
8864 init_ext_80387_constants ();
8866 for (i = 0; i < 5; i++)
8867 if (real_identical (&r, &ext_80387_constants_table[i]))
8868 return i + 3;
8871 /* Load of the constant -0.0 or -1.0 will be split as
8872 fldz;fchs or fld1;fchs sequence. */
8873 if (real_isnegzero (&r))
8874 return 8;
8875 if (real_identical (&r, &dconstm1))
8876 return 9;
8878 return 0;
8881 /* Return the opcode of the special instruction to be used to load
8882 the constant X. */
8884 const char *
8885 standard_80387_constant_opcode (rtx x)
8887 switch (standard_80387_constant_p (x))
8889 case 1:
8890 return "fldz";
8891 case 2:
8892 return "fld1";
8893 case 3:
8894 return "fldlg2";
8895 case 4:
8896 return "fldln2";
8897 case 5:
8898 return "fldl2e";
8899 case 6:
8900 return "fldl2t";
8901 case 7:
8902 return "fldpi";
8903 case 8:
8904 case 9:
8905 return "#";
8906 default:
8907 gcc_unreachable ();
8911 /* Return the CONST_DOUBLE representing the 80387 constant that is
8912 loaded by the specified special instruction. The argument IDX
8913 matches the return value from standard_80387_constant_p. */
8916 standard_80387_constant_rtx (int idx)
8918 int i;
8920 if (! ext_80387_constants_init)
8921 init_ext_80387_constants ();
8923 switch (idx)
8925 case 3:
8926 case 4:
8927 case 5:
8928 case 6:
8929 case 7:
8930 i = idx - 3;
8931 break;
8933 default:
8934 gcc_unreachable ();
8937 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8938 XFmode);
8941 /* Return 1 if X is all 0s and 2 if x is all 1s
8942 in supported SSE/AVX vector mode. */
8945 standard_sse_constant_p (rtx x)
8947 enum machine_mode mode = GET_MODE (x);
8949 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8950 return 1;
8951 if (vector_all_ones_operand (x, mode))
8952 switch (mode)
8954 case V16QImode:
8955 case V8HImode:
8956 case V4SImode:
8957 case V2DImode:
8958 if (TARGET_SSE2)
8959 return 2;
8960 case V32QImode:
8961 case V16HImode:
8962 case V8SImode:
8963 case V4DImode:
8964 if (TARGET_AVX2)
8965 return 2;
8966 case V64QImode:
8967 case V32HImode:
8968 case V16SImode:
8969 case V8DImode:
8970 if (TARGET_AVX512F)
8971 return 2;
8972 default:
8973 break;
8976 return 0;
8979 /* Return the opcode of the special instruction to be used to load
8980 the constant X. */
8982 const char *
8983 standard_sse_constant_opcode (rtx insn, rtx x)
8985 switch (standard_sse_constant_p (x))
8987 case 1:
8988 switch (get_attr_mode (insn))
8990 case MODE_XI:
8991 case MODE_V16SF:
8992 return "vpxord\t%g0, %g0, %g0";
8993 case MODE_V8DF:
8994 return "vpxorq\t%g0, %g0, %g0";
8995 case MODE_TI:
8996 return "%vpxor\t%0, %d0";
8997 case MODE_V2DF:
8998 return "%vxorpd\t%0, %d0";
8999 case MODE_V4SF:
9000 return "%vxorps\t%0, %d0";
9002 case MODE_OI:
9003 return "vpxor\t%x0, %x0, %x0";
9004 case MODE_V4DF:
9005 return "vxorpd\t%x0, %x0, %x0";
9006 case MODE_V8SF:
9007 return "vxorps\t%x0, %x0, %x0";
9009 default:
9010 break;
9013 case 2:
9014 if (get_attr_mode (insn) == MODE_XI
9015 || get_attr_mode (insn) == MODE_V8DF
9016 || get_attr_mode (insn) == MODE_V16SF)
9017 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9018 if (TARGET_AVX)
9019 return "vpcmpeqd\t%0, %0, %0";
9020 else
9021 return "pcmpeqd\t%0, %0";
9023 default:
9024 break;
9026 gcc_unreachable ();
9029 /* Returns true if OP contains a symbol reference */
9031 bool
9032 symbolic_reference_mentioned_p (rtx op)
9034 const char *fmt;
9035 int i;
9037 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9038 return true;
9040 fmt = GET_RTX_FORMAT (GET_CODE (op));
9041 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9043 if (fmt[i] == 'E')
9045 int j;
9047 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9048 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9049 return true;
9052 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9053 return true;
9056 return false;
9059 /* Return true if it is appropriate to emit `ret' instructions in the
9060 body of a function. Do this only if the epilogue is simple, needing a
9061 couple of insns. Prior to reloading, we can't tell how many registers
9062 must be saved, so return false then. Return false if there is no frame
9063 marker to de-allocate. */
9065 bool
9066 ix86_can_use_return_insn_p (void)
9068 struct ix86_frame frame;
9070 if (! reload_completed || frame_pointer_needed)
9071 return 0;
9073 /* Don't allow more than 32k pop, since that's all we can do
9074 with one instruction. */
9075 if (crtl->args.pops_args && crtl->args.size >= 32768)
9076 return 0;
9078 ix86_compute_frame_layout (&frame);
9079 return (frame.stack_pointer_offset == UNITS_PER_WORD
9080 && (frame.nregs + frame.nsseregs) == 0);
9083 /* Value should be nonzero if functions must have frame pointers.
9084 Zero means the frame pointer need not be set up (and parms may
9085 be accessed via the stack pointer) in functions that seem suitable. */
9087 static bool
9088 ix86_frame_pointer_required (void)
9090 /* If we accessed previous frames, then the generated code expects
9091 to be able to access the saved ebp value in our frame. */
9092 if (cfun->machine->accesses_prev_frame)
9093 return true;
9095 /* Several x86 os'es need a frame pointer for other reasons,
9096 usually pertaining to setjmp. */
9097 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9098 return true;
9100 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9101 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9102 return true;
9104 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9105 allocation is 4GB. */
9106 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9107 return true;
9109 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9110 turns off the frame pointer by default. Turn it back on now if
9111 we've not got a leaf function. */
9112 if (TARGET_OMIT_LEAF_FRAME_POINTER
9113 && (!crtl->is_leaf
9114 || ix86_current_function_calls_tls_descriptor))
9115 return true;
9117 if (crtl->profile && !flag_fentry)
9118 return true;
9120 return false;
9123 /* Record that the current function accesses previous call frames. */
9125 void
9126 ix86_setup_frame_addresses (void)
9128 cfun->machine->accesses_prev_frame = 1;
9131 #ifndef USE_HIDDEN_LINKONCE
9132 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9133 # define USE_HIDDEN_LINKONCE 1
9134 # else
9135 # define USE_HIDDEN_LINKONCE 0
9136 # endif
9137 #endif
9139 static int pic_labels_used;
9141 /* Fills in the label name that should be used for a pc thunk for
9142 the given register. */
9144 static void
9145 get_pc_thunk_name (char name[32], unsigned int regno)
9147 gcc_assert (!TARGET_64BIT);
9149 if (USE_HIDDEN_LINKONCE)
9150 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9151 else
9152 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9156 /* This function generates code for -fpic that loads %ebx with
9157 the return address of the caller and then returns. */
9159 static void
9160 ix86_code_end (void)
9162 rtx xops[2];
9163 int regno;
9165 for (regno = AX_REG; regno <= SP_REG; regno++)
9167 char name[32];
9168 tree decl;
9170 if (!(pic_labels_used & (1 << regno)))
9171 continue;
9173 get_pc_thunk_name (name, regno);
9175 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9176 get_identifier (name),
9177 build_function_type_list (void_type_node, NULL_TREE));
9178 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9179 NULL_TREE, void_type_node);
9180 TREE_PUBLIC (decl) = 1;
9181 TREE_STATIC (decl) = 1;
9182 DECL_IGNORED_P (decl) = 1;
9184 #if TARGET_MACHO
9185 if (TARGET_MACHO)
9187 switch_to_section (darwin_sections[text_coal_section]);
9188 fputs ("\t.weak_definition\t", asm_out_file);
9189 assemble_name (asm_out_file, name);
9190 fputs ("\n\t.private_extern\t", asm_out_file);
9191 assemble_name (asm_out_file, name);
9192 putc ('\n', asm_out_file);
9193 ASM_OUTPUT_LABEL (asm_out_file, name);
9194 DECL_WEAK (decl) = 1;
9196 else
9197 #endif
9198 if (USE_HIDDEN_LINKONCE)
9200 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9202 targetm.asm_out.unique_section (decl, 0);
9203 switch_to_section (get_named_section (decl, NULL, 0));
9205 targetm.asm_out.globalize_label (asm_out_file, name);
9206 fputs ("\t.hidden\t", asm_out_file);
9207 assemble_name (asm_out_file, name);
9208 putc ('\n', asm_out_file);
9209 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9211 else
9213 switch_to_section (text_section);
9214 ASM_OUTPUT_LABEL (asm_out_file, name);
9217 DECL_INITIAL (decl) = make_node (BLOCK);
9218 current_function_decl = decl;
9219 init_function_start (decl);
9220 first_function_block_is_cold = false;
9221 /* Make sure unwind info is emitted for the thunk if needed. */
9222 final_start_function (emit_barrier (), asm_out_file, 1);
9224 /* Pad stack IP move with 4 instructions (two NOPs count
9225 as one instruction). */
9226 if (TARGET_PAD_SHORT_FUNCTION)
9228 int i = 8;
9230 while (i--)
9231 fputs ("\tnop\n", asm_out_file);
9234 xops[0] = gen_rtx_REG (Pmode, regno);
9235 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9236 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9237 fputs ("\tret\n", asm_out_file);
9238 final_end_function ();
9239 init_insn_lengths ();
9240 free_after_compilation (cfun);
9241 set_cfun (NULL);
9242 current_function_decl = NULL;
9245 if (flag_split_stack)
9246 file_end_indicate_split_stack ();
9249 /* Emit code for the SET_GOT patterns. */
9251 const char *
9252 output_set_got (rtx dest, rtx label)
9254 rtx xops[3];
9256 xops[0] = dest;
9258 if (TARGET_VXWORKS_RTP && flag_pic)
9260 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9261 xops[2] = gen_rtx_MEM (Pmode,
9262 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9263 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9265 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9266 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9267 an unadorned address. */
9268 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9269 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9270 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9271 return "";
9274 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9276 if (!flag_pic)
9278 if (TARGET_MACHO)
9279 /* We don't need a pic base, we're not producing pic. */
9280 gcc_unreachable ();
9282 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9283 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9284 targetm.asm_out.internal_label (asm_out_file, "L",
9285 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9287 else
9289 char name[32];
9290 get_pc_thunk_name (name, REGNO (dest));
9291 pic_labels_used |= 1 << REGNO (dest);
9293 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9294 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9295 output_asm_insn ("call\t%X2", xops);
9297 #if TARGET_MACHO
9298 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9299 This is what will be referenced by the Mach-O PIC subsystem. */
9300 if (machopic_should_output_picbase_label () || !label)
9301 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9303 /* When we are restoring the pic base at the site of a nonlocal label,
9304 and we decided to emit the pic base above, we will still output a
9305 local label used for calculating the correction offset (even though
9306 the offset will be 0 in that case). */
9307 if (label)
9308 targetm.asm_out.internal_label (asm_out_file, "L",
9309 CODE_LABEL_NUMBER (label));
9310 #endif
9313 if (!TARGET_MACHO)
9314 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9316 return "";
9319 /* Generate an "push" pattern for input ARG. */
9321 static rtx
9322 gen_push (rtx arg)
9324 struct machine_function *m = cfun->machine;
9326 if (m->fs.cfa_reg == stack_pointer_rtx)
9327 m->fs.cfa_offset += UNITS_PER_WORD;
9328 m->fs.sp_offset += UNITS_PER_WORD;
9330 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9331 arg = gen_rtx_REG (word_mode, REGNO (arg));
9333 return gen_rtx_SET (VOIDmode,
9334 gen_rtx_MEM (word_mode,
9335 gen_rtx_PRE_DEC (Pmode,
9336 stack_pointer_rtx)),
9337 arg);
9340 /* Generate an "pop" pattern for input ARG. */
9342 static rtx
9343 gen_pop (rtx arg)
9345 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9346 arg = gen_rtx_REG (word_mode, REGNO (arg));
9348 return gen_rtx_SET (VOIDmode,
9349 arg,
9350 gen_rtx_MEM (word_mode,
9351 gen_rtx_POST_INC (Pmode,
9352 stack_pointer_rtx)));
9355 /* Return >= 0 if there is an unused call-clobbered register available
9356 for the entire function. */
9358 static unsigned int
9359 ix86_select_alt_pic_regnum (void)
9361 if (crtl->is_leaf
9362 && !crtl->profile
9363 && !ix86_current_function_calls_tls_descriptor)
9365 int i, drap;
9366 /* Can't use the same register for both PIC and DRAP. */
9367 if (crtl->drap_reg)
9368 drap = REGNO (crtl->drap_reg);
9369 else
9370 drap = -1;
9371 for (i = 2; i >= 0; --i)
9372 if (i != drap && !df_regs_ever_live_p (i))
9373 return i;
9376 return INVALID_REGNUM;
9379 /* Return TRUE if we need to save REGNO. */
9381 static bool
9382 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9384 if (pic_offset_table_rtx
9385 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9386 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9387 || crtl->profile
9388 || crtl->calls_eh_return
9389 || crtl->uses_const_pool
9390 || cfun->has_nonlocal_label))
9391 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9393 if (crtl->calls_eh_return && maybe_eh_return)
9395 unsigned i;
9396 for (i = 0; ; i++)
9398 unsigned test = EH_RETURN_DATA_REGNO (i);
9399 if (test == INVALID_REGNUM)
9400 break;
9401 if (test == regno)
9402 return true;
9406 if (crtl->drap_reg
9407 && regno == REGNO (crtl->drap_reg)
9408 && !cfun->machine->no_drap_save_restore)
9409 return true;
9411 return (df_regs_ever_live_p (regno)
9412 && !call_used_regs[regno]
9413 && !fixed_regs[regno]
9414 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9417 /* Return number of saved general prupose registers. */
9419 static int
9420 ix86_nsaved_regs (void)
9422 int nregs = 0;
9423 int regno;
9425 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9426 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9427 nregs ++;
9428 return nregs;
9431 /* Return number of saved SSE registrers. */
9433 static int
9434 ix86_nsaved_sseregs (void)
9436 int nregs = 0;
9437 int regno;
9439 if (!TARGET_64BIT_MS_ABI)
9440 return 0;
9441 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9442 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9443 nregs ++;
9444 return nregs;
9447 /* Given FROM and TO register numbers, say whether this elimination is
9448 allowed. If stack alignment is needed, we can only replace argument
9449 pointer with hard frame pointer, or replace frame pointer with stack
9450 pointer. Otherwise, frame pointer elimination is automatically
9451 handled and all other eliminations are valid. */
9453 static bool
9454 ix86_can_eliminate (const int from, const int to)
9456 if (stack_realign_fp)
9457 return ((from == ARG_POINTER_REGNUM
9458 && to == HARD_FRAME_POINTER_REGNUM)
9459 || (from == FRAME_POINTER_REGNUM
9460 && to == STACK_POINTER_REGNUM));
9461 else
9462 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9465 /* Return the offset between two registers, one to be eliminated, and the other
9466 its replacement, at the start of a routine. */
9468 HOST_WIDE_INT
9469 ix86_initial_elimination_offset (int from, int to)
9471 struct ix86_frame frame;
9472 ix86_compute_frame_layout (&frame);
9474 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9475 return frame.hard_frame_pointer_offset;
9476 else if (from == FRAME_POINTER_REGNUM
9477 && to == HARD_FRAME_POINTER_REGNUM)
9478 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9479 else
9481 gcc_assert (to == STACK_POINTER_REGNUM);
9483 if (from == ARG_POINTER_REGNUM)
9484 return frame.stack_pointer_offset;
9486 gcc_assert (from == FRAME_POINTER_REGNUM);
9487 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9491 /* In a dynamically-aligned function, we can't know the offset from
9492 stack pointer to frame pointer, so we must ensure that setjmp
9493 eliminates fp against the hard fp (%ebp) rather than trying to
9494 index from %esp up to the top of the frame across a gap that is
9495 of unknown (at compile-time) size. */
9496 static rtx
9497 ix86_builtin_setjmp_frame_value (void)
9499 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9502 /* When using -fsplit-stack, the allocation routines set a field in
9503 the TCB to the bottom of the stack plus this much space, measured
9504 in bytes. */
9506 #define SPLIT_STACK_AVAILABLE 256
9508 /* Fill structure ix86_frame about frame of currently computed function. */
9510 static void
9511 ix86_compute_frame_layout (struct ix86_frame *frame)
9513 unsigned HOST_WIDE_INT stack_alignment_needed;
9514 HOST_WIDE_INT offset;
9515 unsigned HOST_WIDE_INT preferred_alignment;
9516 HOST_WIDE_INT size = get_frame_size ();
9517 HOST_WIDE_INT to_allocate;
9519 frame->nregs = ix86_nsaved_regs ();
9520 frame->nsseregs = ix86_nsaved_sseregs ();
9522 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9523 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9525 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9526 function prologues and leaf. */
9527 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9528 && (!crtl->is_leaf || cfun->calls_alloca != 0
9529 || ix86_current_function_calls_tls_descriptor))
9531 preferred_alignment = 16;
9532 stack_alignment_needed = 16;
9533 crtl->preferred_stack_boundary = 128;
9534 crtl->stack_alignment_needed = 128;
9537 gcc_assert (!size || stack_alignment_needed);
9538 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9539 gcc_assert (preferred_alignment <= stack_alignment_needed);
9541 /* For SEH we have to limit the amount of code movement into the prologue.
9542 At present we do this via a BLOCKAGE, at which point there's very little
9543 scheduling that can be done, which means that there's very little point
9544 in doing anything except PUSHs. */
9545 if (TARGET_SEH)
9546 cfun->machine->use_fast_prologue_epilogue = false;
9548 /* During reload iteration the amount of registers saved can change.
9549 Recompute the value as needed. Do not recompute when amount of registers
9550 didn't change as reload does multiple calls to the function and does not
9551 expect the decision to change within single iteration. */
9552 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9553 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9555 int count = frame->nregs;
9556 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9558 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9560 /* The fast prologue uses move instead of push to save registers. This
9561 is significantly longer, but also executes faster as modern hardware
9562 can execute the moves in parallel, but can't do that for push/pop.
9564 Be careful about choosing what prologue to emit: When function takes
9565 many instructions to execute we may use slow version as well as in
9566 case function is known to be outside hot spot (this is known with
9567 feedback only). Weight the size of function by number of registers
9568 to save as it is cheap to use one or two push instructions but very
9569 slow to use many of them. */
9570 if (count)
9571 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9572 if (node->frequency < NODE_FREQUENCY_NORMAL
9573 || (flag_branch_probabilities
9574 && node->frequency < NODE_FREQUENCY_HOT))
9575 cfun->machine->use_fast_prologue_epilogue = false;
9576 else
9577 cfun->machine->use_fast_prologue_epilogue
9578 = !expensive_function_p (count);
9581 frame->save_regs_using_mov
9582 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9583 /* If static stack checking is enabled and done with probes,
9584 the registers need to be saved before allocating the frame. */
9585 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9587 /* Skip return address. */
9588 offset = UNITS_PER_WORD;
9590 /* Skip pushed static chain. */
9591 if (ix86_static_chain_on_stack)
9592 offset += UNITS_PER_WORD;
9594 /* Skip saved base pointer. */
9595 if (frame_pointer_needed)
9596 offset += UNITS_PER_WORD;
9597 frame->hfp_save_offset = offset;
9599 /* The traditional frame pointer location is at the top of the frame. */
9600 frame->hard_frame_pointer_offset = offset;
9602 /* Register save area */
9603 offset += frame->nregs * UNITS_PER_WORD;
9604 frame->reg_save_offset = offset;
9606 /* On SEH target, registers are pushed just before the frame pointer
9607 location. */
9608 if (TARGET_SEH)
9609 frame->hard_frame_pointer_offset = offset;
9611 /* Align and set SSE register save area. */
9612 if (frame->nsseregs)
9614 /* The only ABI that has saved SSE registers (Win64) also has a
9615 16-byte aligned default stack, and thus we don't need to be
9616 within the re-aligned local stack frame to save them. */
9617 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9618 offset = (offset + 16 - 1) & -16;
9619 offset += frame->nsseregs * 16;
9621 frame->sse_reg_save_offset = offset;
9623 /* The re-aligned stack starts here. Values before this point are not
9624 directly comparable with values below this point. In order to make
9625 sure that no value happens to be the same before and after, force
9626 the alignment computation below to add a non-zero value. */
9627 if (stack_realign_fp)
9628 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9630 /* Va-arg area */
9631 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9632 offset += frame->va_arg_size;
9634 /* Align start of frame for local function. */
9635 if (stack_realign_fp
9636 || offset != frame->sse_reg_save_offset
9637 || size != 0
9638 || !crtl->is_leaf
9639 || cfun->calls_alloca
9640 || ix86_current_function_calls_tls_descriptor)
9641 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9643 /* Frame pointer points here. */
9644 frame->frame_pointer_offset = offset;
9646 offset += size;
9648 /* Add outgoing arguments area. Can be skipped if we eliminated
9649 all the function calls as dead code.
9650 Skipping is however impossible when function calls alloca. Alloca
9651 expander assumes that last crtl->outgoing_args_size
9652 of stack frame are unused. */
9653 if (ACCUMULATE_OUTGOING_ARGS
9654 && (!crtl->is_leaf || cfun->calls_alloca
9655 || ix86_current_function_calls_tls_descriptor))
9657 offset += crtl->outgoing_args_size;
9658 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9660 else
9661 frame->outgoing_arguments_size = 0;
9663 /* Align stack boundary. Only needed if we're calling another function
9664 or using alloca. */
9665 if (!crtl->is_leaf || cfun->calls_alloca
9666 || ix86_current_function_calls_tls_descriptor)
9667 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9669 /* We've reached end of stack frame. */
9670 frame->stack_pointer_offset = offset;
9672 /* Size prologue needs to allocate. */
9673 to_allocate = offset - frame->sse_reg_save_offset;
9675 if ((!to_allocate && frame->nregs <= 1)
9676 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9677 frame->save_regs_using_mov = false;
9679 if (ix86_using_red_zone ()
9680 && crtl->sp_is_unchanging
9681 && crtl->is_leaf
9682 && !ix86_current_function_calls_tls_descriptor)
9684 frame->red_zone_size = to_allocate;
9685 if (frame->save_regs_using_mov)
9686 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9687 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9688 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9690 else
9691 frame->red_zone_size = 0;
9692 frame->stack_pointer_offset -= frame->red_zone_size;
9694 /* The SEH frame pointer location is near the bottom of the frame.
9695 This is enforced by the fact that the difference between the
9696 stack pointer and the frame pointer is limited to 240 bytes in
9697 the unwind data structure. */
9698 if (TARGET_SEH)
9700 HOST_WIDE_INT diff;
9702 /* If we can leave the frame pointer where it is, do so. Also, returns
9703 the establisher frame for __builtin_frame_address (0). */
9704 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9705 if (diff <= SEH_MAX_FRAME_SIZE
9706 && (diff > 240 || (diff & 15) != 0)
9707 && !crtl->accesses_prior_frames)
9709 /* Ideally we'd determine what portion of the local stack frame
9710 (within the constraint of the lowest 240) is most heavily used.
9711 But without that complication, simply bias the frame pointer
9712 by 128 bytes so as to maximize the amount of the local stack
9713 frame that is addressable with 8-bit offsets. */
9714 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9719 /* This is semi-inlined memory_address_length, but simplified
9720 since we know that we're always dealing with reg+offset, and
9721 to avoid having to create and discard all that rtl. */
9723 static inline int
9724 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9726 int len = 4;
9728 if (offset == 0)
9730 /* EBP and R13 cannot be encoded without an offset. */
9731 len = (regno == BP_REG || regno == R13_REG);
9733 else if (IN_RANGE (offset, -128, 127))
9734 len = 1;
9736 /* ESP and R12 must be encoded with a SIB byte. */
9737 if (regno == SP_REG || regno == R12_REG)
9738 len++;
9740 return len;
9743 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9744 The valid base registers are taken from CFUN->MACHINE->FS. */
9746 static rtx
9747 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9749 const struct machine_function *m = cfun->machine;
9750 rtx base_reg = NULL;
9751 HOST_WIDE_INT base_offset = 0;
9753 if (m->use_fast_prologue_epilogue)
9755 /* Choose the base register most likely to allow the most scheduling
9756 opportunities. Generally FP is valid throughout the function,
9757 while DRAP must be reloaded within the epilogue. But choose either
9758 over the SP due to increased encoding size. */
9760 if (m->fs.fp_valid)
9762 base_reg = hard_frame_pointer_rtx;
9763 base_offset = m->fs.fp_offset - cfa_offset;
9765 else if (m->fs.drap_valid)
9767 base_reg = crtl->drap_reg;
9768 base_offset = 0 - cfa_offset;
9770 else if (m->fs.sp_valid)
9772 base_reg = stack_pointer_rtx;
9773 base_offset = m->fs.sp_offset - cfa_offset;
9776 else
9778 HOST_WIDE_INT toffset;
9779 int len = 16, tlen;
9781 /* Choose the base register with the smallest address encoding.
9782 With a tie, choose FP > DRAP > SP. */
9783 if (m->fs.sp_valid)
9785 base_reg = stack_pointer_rtx;
9786 base_offset = m->fs.sp_offset - cfa_offset;
9787 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9789 if (m->fs.drap_valid)
9791 toffset = 0 - cfa_offset;
9792 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9793 if (tlen <= len)
9795 base_reg = crtl->drap_reg;
9796 base_offset = toffset;
9797 len = tlen;
9800 if (m->fs.fp_valid)
9802 toffset = m->fs.fp_offset - cfa_offset;
9803 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9804 if (tlen <= len)
9806 base_reg = hard_frame_pointer_rtx;
9807 base_offset = toffset;
9808 len = tlen;
9812 gcc_assert (base_reg != NULL);
9814 return plus_constant (Pmode, base_reg, base_offset);
9817 /* Emit code to save registers in the prologue. */
9819 static void
9820 ix86_emit_save_regs (void)
9822 unsigned int regno;
9823 rtx insn;
9825 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9826 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9828 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9829 RTX_FRAME_RELATED_P (insn) = 1;
9833 /* Emit a single register save at CFA - CFA_OFFSET. */
9835 static void
9836 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9837 HOST_WIDE_INT cfa_offset)
9839 struct machine_function *m = cfun->machine;
9840 rtx reg = gen_rtx_REG (mode, regno);
9841 rtx mem, addr, base, insn;
9843 addr = choose_baseaddr (cfa_offset);
9844 mem = gen_frame_mem (mode, addr);
9846 /* For SSE saves, we need to indicate the 128-bit alignment. */
9847 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9849 insn = emit_move_insn (mem, reg);
9850 RTX_FRAME_RELATED_P (insn) = 1;
9852 base = addr;
9853 if (GET_CODE (base) == PLUS)
9854 base = XEXP (base, 0);
9855 gcc_checking_assert (REG_P (base));
9857 /* When saving registers into a re-aligned local stack frame, avoid
9858 any tricky guessing by dwarf2out. */
9859 if (m->fs.realigned)
9861 gcc_checking_assert (stack_realign_drap);
9863 if (regno == REGNO (crtl->drap_reg))
9865 /* A bit of a hack. We force the DRAP register to be saved in
9866 the re-aligned stack frame, which provides us with a copy
9867 of the CFA that will last past the prologue. Install it. */
9868 gcc_checking_assert (cfun->machine->fs.fp_valid);
9869 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9870 cfun->machine->fs.fp_offset - cfa_offset);
9871 mem = gen_rtx_MEM (mode, addr);
9872 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9874 else
9876 /* The frame pointer is a stable reference within the
9877 aligned frame. Use it. */
9878 gcc_checking_assert (cfun->machine->fs.fp_valid);
9879 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9880 cfun->machine->fs.fp_offset - cfa_offset);
9881 mem = gen_rtx_MEM (mode, addr);
9882 add_reg_note (insn, REG_CFA_EXPRESSION,
9883 gen_rtx_SET (VOIDmode, mem, reg));
9887 /* The memory may not be relative to the current CFA register,
9888 which means that we may need to generate a new pattern for
9889 use by the unwind info. */
9890 else if (base != m->fs.cfa_reg)
9892 addr = plus_constant (Pmode, m->fs.cfa_reg,
9893 m->fs.cfa_offset - cfa_offset);
9894 mem = gen_rtx_MEM (mode, addr);
9895 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9899 /* Emit code to save registers using MOV insns.
9900 First register is stored at CFA - CFA_OFFSET. */
9901 static void
9902 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9904 unsigned int regno;
9906 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9907 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9909 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9910 cfa_offset -= UNITS_PER_WORD;
9914 /* Emit code to save SSE registers using MOV insns.
9915 First register is stored at CFA - CFA_OFFSET. */
9916 static void
9917 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9919 unsigned int regno;
9921 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9922 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9924 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9925 cfa_offset -= 16;
9929 static GTY(()) rtx queued_cfa_restores;
9931 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9932 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9933 Don't add the note if the previously saved value will be left untouched
9934 within stack red-zone till return, as unwinders can find the same value
9935 in the register and on the stack. */
9937 static void
9938 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9940 if (!crtl->shrink_wrapped
9941 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9942 return;
9944 if (insn)
9946 add_reg_note (insn, REG_CFA_RESTORE, reg);
9947 RTX_FRAME_RELATED_P (insn) = 1;
9949 else
9950 queued_cfa_restores
9951 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9954 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9956 static void
9957 ix86_add_queued_cfa_restore_notes (rtx insn)
9959 rtx last;
9960 if (!queued_cfa_restores)
9961 return;
9962 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9964 XEXP (last, 1) = REG_NOTES (insn);
9965 REG_NOTES (insn) = queued_cfa_restores;
9966 queued_cfa_restores = NULL_RTX;
9967 RTX_FRAME_RELATED_P (insn) = 1;
9970 /* Expand prologue or epilogue stack adjustment.
9971 The pattern exist to put a dependency on all ebp-based memory accesses.
9972 STYLE should be negative if instructions should be marked as frame related,
9973 zero if %r11 register is live and cannot be freely used and positive
9974 otherwise. */
9976 static void
9977 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9978 int style, bool set_cfa)
9980 struct machine_function *m = cfun->machine;
9981 rtx insn;
9982 bool add_frame_related_expr = false;
9984 if (Pmode == SImode)
9985 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9986 else if (x86_64_immediate_operand (offset, DImode))
9987 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9988 else
9990 rtx tmp;
9991 /* r11 is used by indirect sibcall return as well, set before the
9992 epilogue and used after the epilogue. */
9993 if (style)
9994 tmp = gen_rtx_REG (DImode, R11_REG);
9995 else
9997 gcc_assert (src != hard_frame_pointer_rtx
9998 && dest != hard_frame_pointer_rtx);
9999 tmp = hard_frame_pointer_rtx;
10001 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10002 if (style < 0)
10003 add_frame_related_expr = true;
10005 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10008 insn = emit_insn (insn);
10009 if (style >= 0)
10010 ix86_add_queued_cfa_restore_notes (insn);
10012 if (set_cfa)
10014 rtx r;
10016 gcc_assert (m->fs.cfa_reg == src);
10017 m->fs.cfa_offset += INTVAL (offset);
10018 m->fs.cfa_reg = dest;
10020 r = gen_rtx_PLUS (Pmode, src, offset);
10021 r = gen_rtx_SET (VOIDmode, dest, r);
10022 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10023 RTX_FRAME_RELATED_P (insn) = 1;
10025 else if (style < 0)
10027 RTX_FRAME_RELATED_P (insn) = 1;
10028 if (add_frame_related_expr)
10030 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10031 r = gen_rtx_SET (VOIDmode, dest, r);
10032 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10036 if (dest == stack_pointer_rtx)
10038 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10039 bool valid = m->fs.sp_valid;
10041 if (src == hard_frame_pointer_rtx)
10043 valid = m->fs.fp_valid;
10044 ooffset = m->fs.fp_offset;
10046 else if (src == crtl->drap_reg)
10048 valid = m->fs.drap_valid;
10049 ooffset = 0;
10051 else
10053 /* Else there are two possibilities: SP itself, which we set
10054 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10055 taken care of this by hand along the eh_return path. */
10056 gcc_checking_assert (src == stack_pointer_rtx
10057 || offset == const0_rtx);
10060 m->fs.sp_offset = ooffset - INTVAL (offset);
10061 m->fs.sp_valid = valid;
10065 /* Find an available register to be used as dynamic realign argument
10066 pointer regsiter. Such a register will be written in prologue and
10067 used in begin of body, so it must not be
10068 1. parameter passing register.
10069 2. GOT pointer.
10070 We reuse static-chain register if it is available. Otherwise, we
10071 use DI for i386 and R13 for x86-64. We chose R13 since it has
10072 shorter encoding.
10074 Return: the regno of chosen register. */
10076 static unsigned int
10077 find_drap_reg (void)
10079 tree decl = cfun->decl;
10081 if (TARGET_64BIT)
10083 /* Use R13 for nested function or function need static chain.
10084 Since function with tail call may use any caller-saved
10085 registers in epilogue, DRAP must not use caller-saved
10086 register in such case. */
10087 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10088 return R13_REG;
10090 return R10_REG;
10092 else
10094 /* Use DI for nested function or function need static chain.
10095 Since function with tail call may use any caller-saved
10096 registers in epilogue, DRAP must not use caller-saved
10097 register in such case. */
10098 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10099 return DI_REG;
10101 /* Reuse static chain register if it isn't used for parameter
10102 passing. */
10103 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10105 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10106 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10107 return CX_REG;
10109 return DI_REG;
10113 /* Return minimum incoming stack alignment. */
10115 static unsigned int
10116 ix86_minimum_incoming_stack_boundary (bool sibcall)
10118 unsigned int incoming_stack_boundary;
10120 /* Prefer the one specified at command line. */
10121 if (ix86_user_incoming_stack_boundary)
10122 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10123 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10124 if -mstackrealign is used, it isn't used for sibcall check and
10125 estimated stack alignment is 128bit. */
10126 else if (!sibcall
10127 && !TARGET_64BIT
10128 && ix86_force_align_arg_pointer
10129 && crtl->stack_alignment_estimated == 128)
10130 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10131 else
10132 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10134 /* Incoming stack alignment can be changed on individual functions
10135 via force_align_arg_pointer attribute. We use the smallest
10136 incoming stack boundary. */
10137 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10138 && lookup_attribute (ix86_force_align_arg_pointer_string,
10139 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10140 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10142 /* The incoming stack frame has to be aligned at least at
10143 parm_stack_boundary. */
10144 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10145 incoming_stack_boundary = crtl->parm_stack_boundary;
10147 /* Stack at entrance of main is aligned by runtime. We use the
10148 smallest incoming stack boundary. */
10149 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10150 && DECL_NAME (current_function_decl)
10151 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10152 && DECL_FILE_SCOPE_P (current_function_decl))
10153 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10155 return incoming_stack_boundary;
10158 /* Update incoming stack boundary and estimated stack alignment. */
10160 static void
10161 ix86_update_stack_boundary (void)
10163 ix86_incoming_stack_boundary
10164 = ix86_minimum_incoming_stack_boundary (false);
10166 /* x86_64 vararg needs 16byte stack alignment for register save
10167 area. */
10168 if (TARGET_64BIT
10169 && cfun->stdarg
10170 && crtl->stack_alignment_estimated < 128)
10171 crtl->stack_alignment_estimated = 128;
10174 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10175 needed or an rtx for DRAP otherwise. */
10177 static rtx
10178 ix86_get_drap_rtx (void)
10180 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10181 crtl->need_drap = true;
10183 if (stack_realign_drap)
10185 /* Assign DRAP to vDRAP and returns vDRAP */
10186 unsigned int regno = find_drap_reg ();
10187 rtx drap_vreg;
10188 rtx arg_ptr;
10189 rtx seq, insn;
10191 arg_ptr = gen_rtx_REG (Pmode, regno);
10192 crtl->drap_reg = arg_ptr;
10194 start_sequence ();
10195 drap_vreg = copy_to_reg (arg_ptr);
10196 seq = get_insns ();
10197 end_sequence ();
10199 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10200 if (!optimize)
10202 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10203 RTX_FRAME_RELATED_P (insn) = 1;
10205 return drap_vreg;
10207 else
10208 return NULL;
10211 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10213 static rtx
10214 ix86_internal_arg_pointer (void)
10216 return virtual_incoming_args_rtx;
10219 struct scratch_reg {
10220 rtx reg;
10221 bool saved;
10224 /* Return a short-lived scratch register for use on function entry.
10225 In 32-bit mode, it is valid only after the registers are saved
10226 in the prologue. This register must be released by means of
10227 release_scratch_register_on_entry once it is dead. */
10229 static void
10230 get_scratch_register_on_entry (struct scratch_reg *sr)
10232 int regno;
10234 sr->saved = false;
10236 if (TARGET_64BIT)
10238 /* We always use R11 in 64-bit mode. */
10239 regno = R11_REG;
10241 else
10243 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10244 bool fastcall_p
10245 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10246 bool thiscall_p
10247 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10248 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10249 int regparm = ix86_function_regparm (fntype, decl);
10250 int drap_regno
10251 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10253 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10254 for the static chain register. */
10255 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10256 && drap_regno != AX_REG)
10257 regno = AX_REG;
10258 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10259 for the static chain register. */
10260 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10261 regno = AX_REG;
10262 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10263 regno = DX_REG;
10264 /* ecx is the static chain register. */
10265 else if (regparm < 3 && !fastcall_p && !thiscall_p
10266 && !static_chain_p
10267 && drap_regno != CX_REG)
10268 regno = CX_REG;
10269 else if (ix86_save_reg (BX_REG, true))
10270 regno = BX_REG;
10271 /* esi is the static chain register. */
10272 else if (!(regparm == 3 && static_chain_p)
10273 && ix86_save_reg (SI_REG, true))
10274 regno = SI_REG;
10275 else if (ix86_save_reg (DI_REG, true))
10276 regno = DI_REG;
10277 else
10279 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10280 sr->saved = true;
10284 sr->reg = gen_rtx_REG (Pmode, regno);
10285 if (sr->saved)
10287 rtx insn = emit_insn (gen_push (sr->reg));
10288 RTX_FRAME_RELATED_P (insn) = 1;
10292 /* Release a scratch register obtained from the preceding function. */
10294 static void
10295 release_scratch_register_on_entry (struct scratch_reg *sr)
10297 if (sr->saved)
10299 struct machine_function *m = cfun->machine;
10300 rtx x, insn = emit_insn (gen_pop (sr->reg));
10302 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10303 RTX_FRAME_RELATED_P (insn) = 1;
10304 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10305 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10306 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10307 m->fs.sp_offset -= UNITS_PER_WORD;
10311 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10313 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10315 static void
10316 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10318 /* We skip the probe for the first interval + a small dope of 4 words and
10319 probe that many bytes past the specified size to maintain a protection
10320 area at the botton of the stack. */
10321 const int dope = 4 * UNITS_PER_WORD;
10322 rtx size_rtx = GEN_INT (size), last;
10324 /* See if we have a constant small number of probes to generate. If so,
10325 that's the easy case. The run-time loop is made up of 11 insns in the
10326 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10327 for n # of intervals. */
10328 if (size <= 5 * PROBE_INTERVAL)
10330 HOST_WIDE_INT i, adjust;
10331 bool first_probe = true;
10333 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10334 values of N from 1 until it exceeds SIZE. If only one probe is
10335 needed, this will not generate any code. Then adjust and probe
10336 to PROBE_INTERVAL + SIZE. */
10337 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10339 if (first_probe)
10341 adjust = 2 * PROBE_INTERVAL + dope;
10342 first_probe = false;
10344 else
10345 adjust = PROBE_INTERVAL;
10347 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10348 plus_constant (Pmode, stack_pointer_rtx,
10349 -adjust)));
10350 emit_stack_probe (stack_pointer_rtx);
10353 if (first_probe)
10354 adjust = size + PROBE_INTERVAL + dope;
10355 else
10356 adjust = size + PROBE_INTERVAL - i;
10358 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10359 plus_constant (Pmode, stack_pointer_rtx,
10360 -adjust)));
10361 emit_stack_probe (stack_pointer_rtx);
10363 /* Adjust back to account for the additional first interval. */
10364 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10365 plus_constant (Pmode, stack_pointer_rtx,
10366 PROBE_INTERVAL + dope)));
10369 /* Otherwise, do the same as above, but in a loop. Note that we must be
10370 extra careful with variables wrapping around because we might be at
10371 the very top (or the very bottom) of the address space and we have
10372 to be able to handle this case properly; in particular, we use an
10373 equality test for the loop condition. */
10374 else
10376 HOST_WIDE_INT rounded_size;
10377 struct scratch_reg sr;
10379 get_scratch_register_on_entry (&sr);
10382 /* Step 1: round SIZE to the previous multiple of the interval. */
10384 rounded_size = size & -PROBE_INTERVAL;
10387 /* Step 2: compute initial and final value of the loop counter. */
10389 /* SP = SP_0 + PROBE_INTERVAL. */
10390 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10391 plus_constant (Pmode, stack_pointer_rtx,
10392 - (PROBE_INTERVAL + dope))));
10394 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10395 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10396 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10397 gen_rtx_PLUS (Pmode, sr.reg,
10398 stack_pointer_rtx)));
10401 /* Step 3: the loop
10403 while (SP != LAST_ADDR)
10405 SP = SP + PROBE_INTERVAL
10406 probe at SP
10409 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10410 values of N from 1 until it is equal to ROUNDED_SIZE. */
10412 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10415 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10416 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10418 if (size != rounded_size)
10420 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10421 plus_constant (Pmode, stack_pointer_rtx,
10422 rounded_size - size)));
10423 emit_stack_probe (stack_pointer_rtx);
10426 /* Adjust back to account for the additional first interval. */
10427 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10428 plus_constant (Pmode, stack_pointer_rtx,
10429 PROBE_INTERVAL + dope)));
10431 release_scratch_register_on_entry (&sr);
10434 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10436 /* Even if the stack pointer isn't the CFA register, we need to correctly
10437 describe the adjustments made to it, in particular differentiate the
10438 frame-related ones from the frame-unrelated ones. */
10439 if (size > 0)
10441 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10442 XVECEXP (expr, 0, 0)
10443 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10444 plus_constant (Pmode, stack_pointer_rtx, -size));
10445 XVECEXP (expr, 0, 1)
10446 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10447 plus_constant (Pmode, stack_pointer_rtx,
10448 PROBE_INTERVAL + dope + size));
10449 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10450 RTX_FRAME_RELATED_P (last) = 1;
10452 cfun->machine->fs.sp_offset += size;
10455 /* Make sure nothing is scheduled before we are done. */
10456 emit_insn (gen_blockage ());
10459 /* Adjust the stack pointer up to REG while probing it. */
10461 const char *
10462 output_adjust_stack_and_probe (rtx reg)
10464 static int labelno = 0;
10465 char loop_lab[32], end_lab[32];
10466 rtx xops[2];
10468 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10469 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10471 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10473 /* Jump to END_LAB if SP == LAST_ADDR. */
10474 xops[0] = stack_pointer_rtx;
10475 xops[1] = reg;
10476 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10477 fputs ("\tje\t", asm_out_file);
10478 assemble_name_raw (asm_out_file, end_lab);
10479 fputc ('\n', asm_out_file);
10481 /* SP = SP + PROBE_INTERVAL. */
10482 xops[1] = GEN_INT (PROBE_INTERVAL);
10483 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10485 /* Probe at SP. */
10486 xops[1] = const0_rtx;
10487 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10489 fprintf (asm_out_file, "\tjmp\t");
10490 assemble_name_raw (asm_out_file, loop_lab);
10491 fputc ('\n', asm_out_file);
10493 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10495 return "";
10498 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10499 inclusive. These are offsets from the current stack pointer. */
10501 static void
10502 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10504 /* See if we have a constant small number of probes to generate. If so,
10505 that's the easy case. The run-time loop is made up of 7 insns in the
10506 generic case while the compile-time loop is made up of n insns for n #
10507 of intervals. */
10508 if (size <= 7 * PROBE_INTERVAL)
10510 HOST_WIDE_INT i;
10512 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10513 it exceeds SIZE. If only one probe is needed, this will not
10514 generate any code. Then probe at FIRST + SIZE. */
10515 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10516 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10517 -(first + i)));
10519 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10520 -(first + size)));
10523 /* Otherwise, do the same as above, but in a loop. Note that we must be
10524 extra careful with variables wrapping around because we might be at
10525 the very top (or the very bottom) of the address space and we have
10526 to be able to handle this case properly; in particular, we use an
10527 equality test for the loop condition. */
10528 else
10530 HOST_WIDE_INT rounded_size, last;
10531 struct scratch_reg sr;
10533 get_scratch_register_on_entry (&sr);
10536 /* Step 1: round SIZE to the previous multiple of the interval. */
10538 rounded_size = size & -PROBE_INTERVAL;
10541 /* Step 2: compute initial and final value of the loop counter. */
10543 /* TEST_OFFSET = FIRST. */
10544 emit_move_insn (sr.reg, GEN_INT (-first));
10546 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10547 last = first + rounded_size;
10550 /* Step 3: the loop
10552 while (TEST_ADDR != LAST_ADDR)
10554 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10555 probe at TEST_ADDR
10558 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10559 until it is equal to ROUNDED_SIZE. */
10561 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10564 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10565 that SIZE is equal to ROUNDED_SIZE. */
10567 if (size != rounded_size)
10568 emit_stack_probe (plus_constant (Pmode,
10569 gen_rtx_PLUS (Pmode,
10570 stack_pointer_rtx,
10571 sr.reg),
10572 rounded_size - size));
10574 release_scratch_register_on_entry (&sr);
10577 /* Make sure nothing is scheduled before we are done. */
10578 emit_insn (gen_blockage ());
10581 /* Probe a range of stack addresses from REG to END, inclusive. These are
10582 offsets from the current stack pointer. */
10584 const char *
10585 output_probe_stack_range (rtx reg, rtx end)
10587 static int labelno = 0;
10588 char loop_lab[32], end_lab[32];
10589 rtx xops[3];
10591 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10592 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10594 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10596 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10597 xops[0] = reg;
10598 xops[1] = end;
10599 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10600 fputs ("\tje\t", asm_out_file);
10601 assemble_name_raw (asm_out_file, end_lab);
10602 fputc ('\n', asm_out_file);
10604 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10605 xops[1] = GEN_INT (PROBE_INTERVAL);
10606 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10608 /* Probe at TEST_ADDR. */
10609 xops[0] = stack_pointer_rtx;
10610 xops[1] = reg;
10611 xops[2] = const0_rtx;
10612 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10614 fprintf (asm_out_file, "\tjmp\t");
10615 assemble_name_raw (asm_out_file, loop_lab);
10616 fputc ('\n', asm_out_file);
10618 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10620 return "";
10623 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10624 to be generated in correct form. */
10625 static void
10626 ix86_finalize_stack_realign_flags (void)
10628 /* Check if stack realign is really needed after reload, and
10629 stores result in cfun */
10630 unsigned int incoming_stack_boundary
10631 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10632 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10633 unsigned int stack_realign = (incoming_stack_boundary
10634 < (crtl->is_leaf
10635 ? crtl->max_used_stack_slot_alignment
10636 : crtl->stack_alignment_needed));
10638 if (crtl->stack_realign_finalized)
10640 /* After stack_realign_needed is finalized, we can't no longer
10641 change it. */
10642 gcc_assert (crtl->stack_realign_needed == stack_realign);
10643 return;
10646 /* If the only reason for frame_pointer_needed is that we conservatively
10647 assumed stack realignment might be needed, but in the end nothing that
10648 needed the stack alignment had been spilled, clear frame_pointer_needed
10649 and say we don't need stack realignment. */
10650 if (stack_realign
10651 && frame_pointer_needed
10652 && crtl->is_leaf
10653 && flag_omit_frame_pointer
10654 && crtl->sp_is_unchanging
10655 && !ix86_current_function_calls_tls_descriptor
10656 && !crtl->accesses_prior_frames
10657 && !cfun->calls_alloca
10658 && !crtl->calls_eh_return
10659 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10660 && !ix86_frame_pointer_required ()
10661 && get_frame_size () == 0
10662 && ix86_nsaved_sseregs () == 0
10663 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10665 HARD_REG_SET set_up_by_prologue, prologue_used;
10666 basic_block bb;
10668 CLEAR_HARD_REG_SET (prologue_used);
10669 CLEAR_HARD_REG_SET (set_up_by_prologue);
10670 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10671 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10672 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10673 HARD_FRAME_POINTER_REGNUM);
10674 FOR_EACH_BB_FN (bb, cfun)
10676 rtx insn;
10677 FOR_BB_INSNS (bb, insn)
10678 if (NONDEBUG_INSN_P (insn)
10679 && requires_stack_frame_p (insn, prologue_used,
10680 set_up_by_prologue))
10682 crtl->stack_realign_needed = stack_realign;
10683 crtl->stack_realign_finalized = true;
10684 return;
10688 /* If drap has been set, but it actually isn't live at the start
10689 of the function, there is no reason to set it up. */
10690 if (crtl->drap_reg)
10692 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10693 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10695 crtl->drap_reg = NULL_RTX;
10696 crtl->need_drap = false;
10699 else
10700 cfun->machine->no_drap_save_restore = true;
10702 frame_pointer_needed = false;
10703 stack_realign = false;
10704 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10705 crtl->stack_alignment_needed = incoming_stack_boundary;
10706 crtl->stack_alignment_estimated = incoming_stack_boundary;
10707 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10708 crtl->preferred_stack_boundary = incoming_stack_boundary;
10709 df_finish_pass (true);
10710 df_scan_alloc (NULL);
10711 df_scan_blocks ();
10712 df_compute_regs_ever_live (true);
10713 df_analyze ();
10716 crtl->stack_realign_needed = stack_realign;
10717 crtl->stack_realign_finalized = true;
10720 /* Expand the prologue into a bunch of separate insns. */
10722 void
10723 ix86_expand_prologue (void)
10725 struct machine_function *m = cfun->machine;
10726 rtx insn, t;
10727 bool pic_reg_used;
10728 struct ix86_frame frame;
10729 HOST_WIDE_INT allocate;
10730 bool int_registers_saved;
10731 bool sse_registers_saved;
10733 ix86_finalize_stack_realign_flags ();
10735 /* DRAP should not coexist with stack_realign_fp */
10736 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10738 memset (&m->fs, 0, sizeof (m->fs));
10740 /* Initialize CFA state for before the prologue. */
10741 m->fs.cfa_reg = stack_pointer_rtx;
10742 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10744 /* Track SP offset to the CFA. We continue tracking this after we've
10745 swapped the CFA register away from SP. In the case of re-alignment
10746 this is fudged; we're interested to offsets within the local frame. */
10747 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10748 m->fs.sp_valid = true;
10750 ix86_compute_frame_layout (&frame);
10752 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10754 /* We should have already generated an error for any use of
10755 ms_hook on a nested function. */
10756 gcc_checking_assert (!ix86_static_chain_on_stack);
10758 /* Check if profiling is active and we shall use profiling before
10759 prologue variant. If so sorry. */
10760 if (crtl->profile && flag_fentry != 0)
10761 sorry ("ms_hook_prologue attribute isn%'t compatible "
10762 "with -mfentry for 32-bit");
10764 /* In ix86_asm_output_function_label we emitted:
10765 8b ff movl.s %edi,%edi
10766 55 push %ebp
10767 8b ec movl.s %esp,%ebp
10769 This matches the hookable function prologue in Win32 API
10770 functions in Microsoft Windows XP Service Pack 2 and newer.
10771 Wine uses this to enable Windows apps to hook the Win32 API
10772 functions provided by Wine.
10774 What that means is that we've already set up the frame pointer. */
10776 if (frame_pointer_needed
10777 && !(crtl->drap_reg && crtl->stack_realign_needed))
10779 rtx push, mov;
10781 /* We've decided to use the frame pointer already set up.
10782 Describe this to the unwinder by pretending that both
10783 push and mov insns happen right here.
10785 Putting the unwind info here at the end of the ms_hook
10786 is done so that we can make absolutely certain we get
10787 the required byte sequence at the start of the function,
10788 rather than relying on an assembler that can produce
10789 the exact encoding required.
10791 However it does mean (in the unpatched case) that we have
10792 a 1 insn window where the asynchronous unwind info is
10793 incorrect. However, if we placed the unwind info at
10794 its correct location we would have incorrect unwind info
10795 in the patched case. Which is probably all moot since
10796 I don't expect Wine generates dwarf2 unwind info for the
10797 system libraries that use this feature. */
10799 insn = emit_insn (gen_blockage ());
10801 push = gen_push (hard_frame_pointer_rtx);
10802 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10803 stack_pointer_rtx);
10804 RTX_FRAME_RELATED_P (push) = 1;
10805 RTX_FRAME_RELATED_P (mov) = 1;
10807 RTX_FRAME_RELATED_P (insn) = 1;
10808 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10809 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10811 /* Note that gen_push incremented m->fs.cfa_offset, even
10812 though we didn't emit the push insn here. */
10813 m->fs.cfa_reg = hard_frame_pointer_rtx;
10814 m->fs.fp_offset = m->fs.cfa_offset;
10815 m->fs.fp_valid = true;
10817 else
10819 /* The frame pointer is not needed so pop %ebp again.
10820 This leaves us with a pristine state. */
10821 emit_insn (gen_pop (hard_frame_pointer_rtx));
10825 /* The first insn of a function that accepts its static chain on the
10826 stack is to push the register that would be filled in by a direct
10827 call. This insn will be skipped by the trampoline. */
10828 else if (ix86_static_chain_on_stack)
10830 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10831 emit_insn (gen_blockage ());
10833 /* We don't want to interpret this push insn as a register save,
10834 only as a stack adjustment. The real copy of the register as
10835 a save will be done later, if needed. */
10836 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10837 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10838 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10839 RTX_FRAME_RELATED_P (insn) = 1;
10842 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10843 of DRAP is needed and stack realignment is really needed after reload */
10844 if (stack_realign_drap)
10846 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10848 /* Only need to push parameter pointer reg if it is caller saved. */
10849 if (!call_used_regs[REGNO (crtl->drap_reg)])
10851 /* Push arg pointer reg */
10852 insn = emit_insn (gen_push (crtl->drap_reg));
10853 RTX_FRAME_RELATED_P (insn) = 1;
10856 /* Grab the argument pointer. */
10857 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10858 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10859 RTX_FRAME_RELATED_P (insn) = 1;
10860 m->fs.cfa_reg = crtl->drap_reg;
10861 m->fs.cfa_offset = 0;
10863 /* Align the stack. */
10864 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10865 stack_pointer_rtx,
10866 GEN_INT (-align_bytes)));
10867 RTX_FRAME_RELATED_P (insn) = 1;
10869 /* Replicate the return address on the stack so that return
10870 address can be reached via (argp - 1) slot. This is needed
10871 to implement macro RETURN_ADDR_RTX and intrinsic function
10872 expand_builtin_return_addr etc. */
10873 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10874 t = gen_frame_mem (word_mode, t);
10875 insn = emit_insn (gen_push (t));
10876 RTX_FRAME_RELATED_P (insn) = 1;
10878 /* For the purposes of frame and register save area addressing,
10879 we've started over with a new frame. */
10880 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10881 m->fs.realigned = true;
10884 int_registers_saved = (frame.nregs == 0);
10885 sse_registers_saved = (frame.nsseregs == 0);
10887 if (frame_pointer_needed && !m->fs.fp_valid)
10889 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10890 slower on all targets. Also sdb doesn't like it. */
10891 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10892 RTX_FRAME_RELATED_P (insn) = 1;
10894 /* Push registers now, before setting the frame pointer
10895 on SEH target. */
10896 if (!int_registers_saved
10897 && TARGET_SEH
10898 && !frame.save_regs_using_mov)
10900 ix86_emit_save_regs ();
10901 int_registers_saved = true;
10902 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10905 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10907 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10908 RTX_FRAME_RELATED_P (insn) = 1;
10910 if (m->fs.cfa_reg == stack_pointer_rtx)
10911 m->fs.cfa_reg = hard_frame_pointer_rtx;
10912 m->fs.fp_offset = m->fs.sp_offset;
10913 m->fs.fp_valid = true;
10917 if (!int_registers_saved)
10919 /* If saving registers via PUSH, do so now. */
10920 if (!frame.save_regs_using_mov)
10922 ix86_emit_save_regs ();
10923 int_registers_saved = true;
10924 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10927 /* When using red zone we may start register saving before allocating
10928 the stack frame saving one cycle of the prologue. However, avoid
10929 doing this if we have to probe the stack; at least on x86_64 the
10930 stack probe can turn into a call that clobbers a red zone location. */
10931 else if (ix86_using_red_zone ()
10932 && (! TARGET_STACK_PROBE
10933 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10935 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10936 int_registers_saved = true;
10940 if (stack_realign_fp)
10942 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10943 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10945 /* The computation of the size of the re-aligned stack frame means
10946 that we must allocate the size of the register save area before
10947 performing the actual alignment. Otherwise we cannot guarantee
10948 that there's enough storage above the realignment point. */
10949 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10950 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10951 GEN_INT (m->fs.sp_offset
10952 - frame.sse_reg_save_offset),
10953 -1, false);
10955 /* Align the stack. */
10956 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10957 stack_pointer_rtx,
10958 GEN_INT (-align_bytes)));
10960 /* For the purposes of register save area addressing, the stack
10961 pointer is no longer valid. As for the value of sp_offset,
10962 see ix86_compute_frame_layout, which we need to match in order
10963 to pass verification of stack_pointer_offset at the end. */
10964 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10965 m->fs.sp_valid = false;
10968 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10970 if (flag_stack_usage_info)
10972 /* We start to count from ARG_POINTER. */
10973 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10975 /* If it was realigned, take into account the fake frame. */
10976 if (stack_realign_drap)
10978 if (ix86_static_chain_on_stack)
10979 stack_size += UNITS_PER_WORD;
10981 if (!call_used_regs[REGNO (crtl->drap_reg)])
10982 stack_size += UNITS_PER_WORD;
10984 /* This over-estimates by 1 minimal-stack-alignment-unit but
10985 mitigates that by counting in the new return address slot. */
10986 current_function_dynamic_stack_size
10987 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10990 current_function_static_stack_size = stack_size;
10993 /* On SEH target with very large frame size, allocate an area to save
10994 SSE registers (as the very large allocation won't be described). */
10995 if (TARGET_SEH
10996 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10997 && !sse_registers_saved)
10999 HOST_WIDE_INT sse_size =
11000 frame.sse_reg_save_offset - frame.reg_save_offset;
11002 gcc_assert (int_registers_saved);
11004 /* No need to do stack checking as the area will be immediately
11005 written. */
11006 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11007 GEN_INT (-sse_size), -1,
11008 m->fs.cfa_reg == stack_pointer_rtx);
11009 allocate -= sse_size;
11010 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11011 sse_registers_saved = true;
11014 /* The stack has already been decremented by the instruction calling us
11015 so probe if the size is non-negative to preserve the protection area. */
11016 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11018 /* We expect the registers to be saved when probes are used. */
11019 gcc_assert (int_registers_saved);
11021 if (STACK_CHECK_MOVING_SP)
11023 if (!(crtl->is_leaf && !cfun->calls_alloca
11024 && allocate <= PROBE_INTERVAL))
11026 ix86_adjust_stack_and_probe (allocate);
11027 allocate = 0;
11030 else
11032 HOST_WIDE_INT size = allocate;
11034 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11035 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11037 if (TARGET_STACK_PROBE)
11039 if (crtl->is_leaf && !cfun->calls_alloca)
11041 if (size > PROBE_INTERVAL)
11042 ix86_emit_probe_stack_range (0, size);
11044 else
11045 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11047 else
11049 if (crtl->is_leaf && !cfun->calls_alloca)
11051 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11052 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11053 size - STACK_CHECK_PROTECT);
11055 else
11056 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11061 if (allocate == 0)
11063 else if (!ix86_target_stack_probe ()
11064 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11066 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11067 GEN_INT (-allocate), -1,
11068 m->fs.cfa_reg == stack_pointer_rtx);
11070 else
11072 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11073 rtx r10 = NULL;
11074 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11075 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11076 bool eax_live = ix86_eax_live_at_start_p ();
11077 bool r10_live = false;
11079 if (TARGET_64BIT)
11080 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11082 if (eax_live)
11084 insn = emit_insn (gen_push (eax));
11085 allocate -= UNITS_PER_WORD;
11086 /* Note that SEH directives need to continue tracking the stack
11087 pointer even after the frame pointer has been set up. */
11088 if (sp_is_cfa_reg || TARGET_SEH)
11090 if (sp_is_cfa_reg)
11091 m->fs.cfa_offset += UNITS_PER_WORD;
11092 RTX_FRAME_RELATED_P (insn) = 1;
11093 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11094 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11095 plus_constant (Pmode, stack_pointer_rtx,
11096 -UNITS_PER_WORD)));
11100 if (r10_live)
11102 r10 = gen_rtx_REG (Pmode, R10_REG);
11103 insn = emit_insn (gen_push (r10));
11104 allocate -= UNITS_PER_WORD;
11105 if (sp_is_cfa_reg || TARGET_SEH)
11107 if (sp_is_cfa_reg)
11108 m->fs.cfa_offset += UNITS_PER_WORD;
11109 RTX_FRAME_RELATED_P (insn) = 1;
11110 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11111 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11112 plus_constant (Pmode, stack_pointer_rtx,
11113 -UNITS_PER_WORD)));
11117 emit_move_insn (eax, GEN_INT (allocate));
11118 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11120 /* Use the fact that AX still contains ALLOCATE. */
11121 adjust_stack_insn = (Pmode == DImode
11122 ? gen_pro_epilogue_adjust_stack_di_sub
11123 : gen_pro_epilogue_adjust_stack_si_sub);
11125 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11126 stack_pointer_rtx, eax));
11128 if (sp_is_cfa_reg || TARGET_SEH)
11130 if (sp_is_cfa_reg)
11131 m->fs.cfa_offset += allocate;
11132 RTX_FRAME_RELATED_P (insn) = 1;
11133 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11134 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11135 plus_constant (Pmode, stack_pointer_rtx,
11136 -allocate)));
11138 m->fs.sp_offset += allocate;
11140 /* Use stack_pointer_rtx for relative addressing so that code
11141 works for realigned stack, too. */
11142 if (r10_live && eax_live)
11144 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11145 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11146 gen_frame_mem (word_mode, t));
11147 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11148 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11149 gen_frame_mem (word_mode, t));
11151 else if (eax_live || r10_live)
11153 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11154 emit_move_insn (gen_rtx_REG (word_mode,
11155 (eax_live ? AX_REG : R10_REG)),
11156 gen_frame_mem (word_mode, t));
11159 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11161 /* If we havn't already set up the frame pointer, do so now. */
11162 if (frame_pointer_needed && !m->fs.fp_valid)
11164 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11165 GEN_INT (frame.stack_pointer_offset
11166 - frame.hard_frame_pointer_offset));
11167 insn = emit_insn (insn);
11168 RTX_FRAME_RELATED_P (insn) = 1;
11169 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11171 if (m->fs.cfa_reg == stack_pointer_rtx)
11172 m->fs.cfa_reg = hard_frame_pointer_rtx;
11173 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11174 m->fs.fp_valid = true;
11177 if (!int_registers_saved)
11178 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11179 if (!sse_registers_saved)
11180 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11182 pic_reg_used = false;
11183 /* We don't use pic-register for pe-coff target. */
11184 if (pic_offset_table_rtx
11185 && !TARGET_PECOFF
11186 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11187 || crtl->profile))
11189 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11191 if (alt_pic_reg_used != INVALID_REGNUM)
11192 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11194 pic_reg_used = true;
11197 if (pic_reg_used)
11199 if (TARGET_64BIT)
11201 if (ix86_cmodel == CM_LARGE_PIC)
11203 rtx label, tmp_reg;
11205 gcc_assert (Pmode == DImode);
11206 label = gen_label_rtx ();
11207 emit_label (label);
11208 LABEL_PRESERVE_P (label) = 1;
11209 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11210 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11211 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11212 label));
11213 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11214 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11215 pic_offset_table_rtx, tmp_reg));
11217 else
11218 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11220 else
11222 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11223 RTX_FRAME_RELATED_P (insn) = 1;
11224 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11228 /* In the pic_reg_used case, make sure that the got load isn't deleted
11229 when mcount needs it. Blockage to avoid call movement across mcount
11230 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11231 note. */
11232 if (crtl->profile && !flag_fentry && pic_reg_used)
11233 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11235 if (crtl->drap_reg && !crtl->stack_realign_needed)
11237 /* vDRAP is setup but after reload it turns out stack realign
11238 isn't necessary, here we will emit prologue to setup DRAP
11239 without stack realign adjustment */
11240 t = choose_baseaddr (0);
11241 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11244 /* Prevent instructions from being scheduled into register save push
11245 sequence when access to the redzone area is done through frame pointer.
11246 The offset between the frame pointer and the stack pointer is calculated
11247 relative to the value of the stack pointer at the end of the function
11248 prologue, and moving instructions that access redzone area via frame
11249 pointer inside push sequence violates this assumption. */
11250 if (frame_pointer_needed && frame.red_zone_size)
11251 emit_insn (gen_memory_blockage ());
11253 /* Emit cld instruction if stringops are used in the function. */
11254 if (TARGET_CLD && ix86_current_function_needs_cld)
11255 emit_insn (gen_cld ());
11257 /* SEH requires that the prologue end within 256 bytes of the start of
11258 the function. Prevent instruction schedules that would extend that.
11259 Further, prevent alloca modifications to the stack pointer from being
11260 combined with prologue modifications. */
11261 if (TARGET_SEH)
11262 emit_insn (gen_prologue_use (stack_pointer_rtx));
11265 /* Emit code to restore REG using a POP insn. */
11267 static void
11268 ix86_emit_restore_reg_using_pop (rtx reg)
11270 struct machine_function *m = cfun->machine;
11271 rtx insn = emit_insn (gen_pop (reg));
11273 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11274 m->fs.sp_offset -= UNITS_PER_WORD;
11276 if (m->fs.cfa_reg == crtl->drap_reg
11277 && REGNO (reg) == REGNO (crtl->drap_reg))
11279 /* Previously we'd represented the CFA as an expression
11280 like *(%ebp - 8). We've just popped that value from
11281 the stack, which means we need to reset the CFA to
11282 the drap register. This will remain until we restore
11283 the stack pointer. */
11284 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11285 RTX_FRAME_RELATED_P (insn) = 1;
11287 /* This means that the DRAP register is valid for addressing too. */
11288 m->fs.drap_valid = true;
11289 return;
11292 if (m->fs.cfa_reg == stack_pointer_rtx)
11294 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11295 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11296 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11297 RTX_FRAME_RELATED_P (insn) = 1;
11299 m->fs.cfa_offset -= UNITS_PER_WORD;
11302 /* When the frame pointer is the CFA, and we pop it, we are
11303 swapping back to the stack pointer as the CFA. This happens
11304 for stack frames that don't allocate other data, so we assume
11305 the stack pointer is now pointing at the return address, i.e.
11306 the function entry state, which makes the offset be 1 word. */
11307 if (reg == hard_frame_pointer_rtx)
11309 m->fs.fp_valid = false;
11310 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11312 m->fs.cfa_reg = stack_pointer_rtx;
11313 m->fs.cfa_offset -= UNITS_PER_WORD;
11315 add_reg_note (insn, REG_CFA_DEF_CFA,
11316 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11317 GEN_INT (m->fs.cfa_offset)));
11318 RTX_FRAME_RELATED_P (insn) = 1;
11323 /* Emit code to restore saved registers using POP insns. */
11325 static void
11326 ix86_emit_restore_regs_using_pop (void)
11328 unsigned int regno;
11330 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11331 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11332 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11335 /* Emit code and notes for the LEAVE instruction. */
11337 static void
11338 ix86_emit_leave (void)
11340 struct machine_function *m = cfun->machine;
11341 rtx insn = emit_insn (ix86_gen_leave ());
11343 ix86_add_queued_cfa_restore_notes (insn);
11345 gcc_assert (m->fs.fp_valid);
11346 m->fs.sp_valid = true;
11347 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11348 m->fs.fp_valid = false;
11350 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11352 m->fs.cfa_reg = stack_pointer_rtx;
11353 m->fs.cfa_offset = m->fs.sp_offset;
11355 add_reg_note (insn, REG_CFA_DEF_CFA,
11356 plus_constant (Pmode, stack_pointer_rtx,
11357 m->fs.sp_offset));
11358 RTX_FRAME_RELATED_P (insn) = 1;
11360 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11361 m->fs.fp_offset);
11364 /* Emit code to restore saved registers using MOV insns.
11365 First register is restored from CFA - CFA_OFFSET. */
11366 static void
11367 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11368 bool maybe_eh_return)
11370 struct machine_function *m = cfun->machine;
11371 unsigned int regno;
11373 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11374 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11376 rtx reg = gen_rtx_REG (word_mode, regno);
11377 rtx insn, mem;
11379 mem = choose_baseaddr (cfa_offset);
11380 mem = gen_frame_mem (word_mode, mem);
11381 insn = emit_move_insn (reg, mem);
11383 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11385 /* Previously we'd represented the CFA as an expression
11386 like *(%ebp - 8). We've just popped that value from
11387 the stack, which means we need to reset the CFA to
11388 the drap register. This will remain until we restore
11389 the stack pointer. */
11390 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11391 RTX_FRAME_RELATED_P (insn) = 1;
11393 /* This means that the DRAP register is valid for addressing. */
11394 m->fs.drap_valid = true;
11396 else
11397 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11399 cfa_offset -= UNITS_PER_WORD;
11403 /* Emit code to restore saved registers using MOV insns.
11404 First register is restored from CFA - CFA_OFFSET. */
11405 static void
11406 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11407 bool maybe_eh_return)
11409 unsigned int regno;
11411 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11412 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11414 rtx reg = gen_rtx_REG (V4SFmode, regno);
11415 rtx mem;
11417 mem = choose_baseaddr (cfa_offset);
11418 mem = gen_rtx_MEM (V4SFmode, mem);
11419 set_mem_align (mem, 128);
11420 emit_move_insn (reg, mem);
11422 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11424 cfa_offset -= 16;
11428 /* Restore function stack, frame, and registers. */
11430 void
11431 ix86_expand_epilogue (int style)
11433 struct machine_function *m = cfun->machine;
11434 struct machine_frame_state frame_state_save = m->fs;
11435 struct ix86_frame frame;
11436 bool restore_regs_via_mov;
11437 bool using_drap;
11439 ix86_finalize_stack_realign_flags ();
11440 ix86_compute_frame_layout (&frame);
11442 m->fs.sp_valid = (!frame_pointer_needed
11443 || (crtl->sp_is_unchanging
11444 && !stack_realign_fp));
11445 gcc_assert (!m->fs.sp_valid
11446 || m->fs.sp_offset == frame.stack_pointer_offset);
11448 /* The FP must be valid if the frame pointer is present. */
11449 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11450 gcc_assert (!m->fs.fp_valid
11451 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11453 /* We must have *some* valid pointer to the stack frame. */
11454 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11456 /* The DRAP is never valid at this point. */
11457 gcc_assert (!m->fs.drap_valid);
11459 /* See the comment about red zone and frame
11460 pointer usage in ix86_expand_prologue. */
11461 if (frame_pointer_needed && frame.red_zone_size)
11462 emit_insn (gen_memory_blockage ());
11464 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11465 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11467 /* Determine the CFA offset of the end of the red-zone. */
11468 m->fs.red_zone_offset = 0;
11469 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11471 /* The red-zone begins below the return address. */
11472 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11474 /* When the register save area is in the aligned portion of
11475 the stack, determine the maximum runtime displacement that
11476 matches up with the aligned frame. */
11477 if (stack_realign_drap)
11478 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11479 + UNITS_PER_WORD);
11482 /* Special care must be taken for the normal return case of a function
11483 using eh_return: the eax and edx registers are marked as saved, but
11484 not restored along this path. Adjust the save location to match. */
11485 if (crtl->calls_eh_return && style != 2)
11486 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11488 /* EH_RETURN requires the use of moves to function properly. */
11489 if (crtl->calls_eh_return)
11490 restore_regs_via_mov = true;
11491 /* SEH requires the use of pops to identify the epilogue. */
11492 else if (TARGET_SEH)
11493 restore_regs_via_mov = false;
11494 /* If we're only restoring one register and sp is not valid then
11495 using a move instruction to restore the register since it's
11496 less work than reloading sp and popping the register. */
11497 else if (!m->fs.sp_valid && frame.nregs <= 1)
11498 restore_regs_via_mov = true;
11499 else if (TARGET_EPILOGUE_USING_MOVE
11500 && cfun->machine->use_fast_prologue_epilogue
11501 && (frame.nregs > 1
11502 || m->fs.sp_offset != frame.reg_save_offset))
11503 restore_regs_via_mov = true;
11504 else if (frame_pointer_needed
11505 && !frame.nregs
11506 && m->fs.sp_offset != frame.reg_save_offset)
11507 restore_regs_via_mov = true;
11508 else if (frame_pointer_needed
11509 && TARGET_USE_LEAVE
11510 && cfun->machine->use_fast_prologue_epilogue
11511 && frame.nregs == 1)
11512 restore_regs_via_mov = true;
11513 else
11514 restore_regs_via_mov = false;
11516 if (restore_regs_via_mov || frame.nsseregs)
11518 /* Ensure that the entire register save area is addressable via
11519 the stack pointer, if we will restore via sp. */
11520 if (TARGET_64BIT
11521 && m->fs.sp_offset > 0x7fffffff
11522 && !(m->fs.fp_valid || m->fs.drap_valid)
11523 && (frame.nsseregs + frame.nregs) != 0)
11525 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11526 GEN_INT (m->fs.sp_offset
11527 - frame.sse_reg_save_offset),
11528 style,
11529 m->fs.cfa_reg == stack_pointer_rtx);
11533 /* If there are any SSE registers to restore, then we have to do it
11534 via moves, since there's obviously no pop for SSE regs. */
11535 if (frame.nsseregs)
11536 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11537 style == 2);
11539 if (restore_regs_via_mov)
11541 rtx t;
11543 if (frame.nregs)
11544 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11546 /* eh_return epilogues need %ecx added to the stack pointer. */
11547 if (style == 2)
11549 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11551 /* Stack align doesn't work with eh_return. */
11552 gcc_assert (!stack_realign_drap);
11553 /* Neither does regparm nested functions. */
11554 gcc_assert (!ix86_static_chain_on_stack);
11556 if (frame_pointer_needed)
11558 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11559 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11560 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11562 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11563 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11565 /* Note that we use SA as a temporary CFA, as the return
11566 address is at the proper place relative to it. We
11567 pretend this happens at the FP restore insn because
11568 prior to this insn the FP would be stored at the wrong
11569 offset relative to SA, and after this insn we have no
11570 other reasonable register to use for the CFA. We don't
11571 bother resetting the CFA to the SP for the duration of
11572 the return insn. */
11573 add_reg_note (insn, REG_CFA_DEF_CFA,
11574 plus_constant (Pmode, sa, UNITS_PER_WORD));
11575 ix86_add_queued_cfa_restore_notes (insn);
11576 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11577 RTX_FRAME_RELATED_P (insn) = 1;
11579 m->fs.cfa_reg = sa;
11580 m->fs.cfa_offset = UNITS_PER_WORD;
11581 m->fs.fp_valid = false;
11583 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11584 const0_rtx, style, false);
11586 else
11588 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11589 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11590 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11591 ix86_add_queued_cfa_restore_notes (insn);
11593 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11594 if (m->fs.cfa_offset != UNITS_PER_WORD)
11596 m->fs.cfa_offset = UNITS_PER_WORD;
11597 add_reg_note (insn, REG_CFA_DEF_CFA,
11598 plus_constant (Pmode, stack_pointer_rtx,
11599 UNITS_PER_WORD));
11600 RTX_FRAME_RELATED_P (insn) = 1;
11603 m->fs.sp_offset = UNITS_PER_WORD;
11604 m->fs.sp_valid = true;
11607 else
11609 /* SEH requires that the function end with (1) a stack adjustment
11610 if necessary, (2) a sequence of pops, and (3) a return or
11611 jump instruction. Prevent insns from the function body from
11612 being scheduled into this sequence. */
11613 if (TARGET_SEH)
11615 /* Prevent a catch region from being adjacent to the standard
11616 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11617 several other flags that would be interesting to test are
11618 not yet set up. */
11619 if (flag_non_call_exceptions)
11620 emit_insn (gen_nops (const1_rtx));
11621 else
11622 emit_insn (gen_blockage ());
11625 /* First step is to deallocate the stack frame so that we can
11626 pop the registers. Also do it on SEH target for very large
11627 frame as the emitted instructions aren't allowed by the ABI in
11628 epilogues. */
11629 if (!m->fs.sp_valid
11630 || (TARGET_SEH
11631 && (m->fs.sp_offset - frame.reg_save_offset
11632 >= SEH_MAX_FRAME_SIZE)))
11634 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11635 GEN_INT (m->fs.fp_offset
11636 - frame.reg_save_offset),
11637 style, false);
11639 else if (m->fs.sp_offset != frame.reg_save_offset)
11641 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11642 GEN_INT (m->fs.sp_offset
11643 - frame.reg_save_offset),
11644 style,
11645 m->fs.cfa_reg == stack_pointer_rtx);
11648 ix86_emit_restore_regs_using_pop ();
11651 /* If we used a stack pointer and haven't already got rid of it,
11652 then do so now. */
11653 if (m->fs.fp_valid)
11655 /* If the stack pointer is valid and pointing at the frame
11656 pointer store address, then we only need a pop. */
11657 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11658 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11659 /* Leave results in shorter dependency chains on CPUs that are
11660 able to grok it fast. */
11661 else if (TARGET_USE_LEAVE
11662 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11663 || !cfun->machine->use_fast_prologue_epilogue)
11664 ix86_emit_leave ();
11665 else
11667 pro_epilogue_adjust_stack (stack_pointer_rtx,
11668 hard_frame_pointer_rtx,
11669 const0_rtx, style, !using_drap);
11670 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11674 if (using_drap)
11676 int param_ptr_offset = UNITS_PER_WORD;
11677 rtx insn;
11679 gcc_assert (stack_realign_drap);
11681 if (ix86_static_chain_on_stack)
11682 param_ptr_offset += UNITS_PER_WORD;
11683 if (!call_used_regs[REGNO (crtl->drap_reg)])
11684 param_ptr_offset += UNITS_PER_WORD;
11686 insn = emit_insn (gen_rtx_SET
11687 (VOIDmode, stack_pointer_rtx,
11688 gen_rtx_PLUS (Pmode,
11689 crtl->drap_reg,
11690 GEN_INT (-param_ptr_offset))));
11691 m->fs.cfa_reg = stack_pointer_rtx;
11692 m->fs.cfa_offset = param_ptr_offset;
11693 m->fs.sp_offset = param_ptr_offset;
11694 m->fs.realigned = false;
11696 add_reg_note (insn, REG_CFA_DEF_CFA,
11697 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11698 GEN_INT (param_ptr_offset)));
11699 RTX_FRAME_RELATED_P (insn) = 1;
11701 if (!call_used_regs[REGNO (crtl->drap_reg)])
11702 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11705 /* At this point the stack pointer must be valid, and we must have
11706 restored all of the registers. We may not have deallocated the
11707 entire stack frame. We've delayed this until now because it may
11708 be possible to merge the local stack deallocation with the
11709 deallocation forced by ix86_static_chain_on_stack. */
11710 gcc_assert (m->fs.sp_valid);
11711 gcc_assert (!m->fs.fp_valid);
11712 gcc_assert (!m->fs.realigned);
11713 if (m->fs.sp_offset != UNITS_PER_WORD)
11715 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11716 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11717 style, true);
11719 else
11720 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11722 /* Sibcall epilogues don't want a return instruction. */
11723 if (style == 0)
11725 m->fs = frame_state_save;
11726 return;
11729 if (crtl->args.pops_args && crtl->args.size)
11731 rtx popc = GEN_INT (crtl->args.pops_args);
11733 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11734 address, do explicit add, and jump indirectly to the caller. */
11736 if (crtl->args.pops_args >= 65536)
11738 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11739 rtx insn;
11741 /* There is no "pascal" calling convention in any 64bit ABI. */
11742 gcc_assert (!TARGET_64BIT);
11744 insn = emit_insn (gen_pop (ecx));
11745 m->fs.cfa_offset -= UNITS_PER_WORD;
11746 m->fs.sp_offset -= UNITS_PER_WORD;
11748 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11749 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11750 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11751 add_reg_note (insn, REG_CFA_REGISTER,
11752 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11753 RTX_FRAME_RELATED_P (insn) = 1;
11755 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11756 popc, -1, true);
11757 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11759 else
11760 emit_jump_insn (gen_simple_return_pop_internal (popc));
11762 else
11763 emit_jump_insn (gen_simple_return_internal ());
11765 /* Restore the state back to the state from the prologue,
11766 so that it's correct for the next epilogue. */
11767 m->fs = frame_state_save;
11770 /* Reset from the function's potential modifications. */
11772 static void
11773 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11774 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11776 if (pic_offset_table_rtx)
11777 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11778 #if TARGET_MACHO
11779 /* Mach-O doesn't support labels at the end of objects, so if
11780 it looks like we might want one, insert a NOP. */
11782 rtx insn = get_last_insn ();
11783 rtx deleted_debug_label = NULL_RTX;
11784 while (insn
11785 && NOTE_P (insn)
11786 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11788 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11789 notes only, instead set their CODE_LABEL_NUMBER to -1,
11790 otherwise there would be code generation differences
11791 in between -g and -g0. */
11792 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11793 deleted_debug_label = insn;
11794 insn = PREV_INSN (insn);
11796 if (insn
11797 && (LABEL_P (insn)
11798 || (NOTE_P (insn)
11799 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11800 fputs ("\tnop\n", file);
11801 else if (deleted_debug_label)
11802 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11803 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11804 CODE_LABEL_NUMBER (insn) = -1;
11806 #endif
11810 /* Return a scratch register to use in the split stack prologue. The
11811 split stack prologue is used for -fsplit-stack. It is the first
11812 instructions in the function, even before the regular prologue.
11813 The scratch register can be any caller-saved register which is not
11814 used for parameters or for the static chain. */
11816 static unsigned int
11817 split_stack_prologue_scratch_regno (void)
11819 if (TARGET_64BIT)
11820 return R11_REG;
11821 else
11823 bool is_fastcall, is_thiscall;
11824 int regparm;
11826 is_fastcall = (lookup_attribute ("fastcall",
11827 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11828 != NULL);
11829 is_thiscall = (lookup_attribute ("thiscall",
11830 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11831 != NULL);
11832 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11834 if (is_fastcall)
11836 if (DECL_STATIC_CHAIN (cfun->decl))
11838 sorry ("-fsplit-stack does not support fastcall with "
11839 "nested function");
11840 return INVALID_REGNUM;
11842 return AX_REG;
11844 else if (is_thiscall)
11846 if (!DECL_STATIC_CHAIN (cfun->decl))
11847 return DX_REG;
11848 return AX_REG;
11850 else if (regparm < 3)
11852 if (!DECL_STATIC_CHAIN (cfun->decl))
11853 return CX_REG;
11854 else
11856 if (regparm >= 2)
11858 sorry ("-fsplit-stack does not support 2 register "
11859 " parameters for a nested function");
11860 return INVALID_REGNUM;
11862 return DX_REG;
11865 else
11867 /* FIXME: We could make this work by pushing a register
11868 around the addition and comparison. */
11869 sorry ("-fsplit-stack does not support 3 register parameters");
11870 return INVALID_REGNUM;
11875 /* A SYMBOL_REF for the function which allocates new stackspace for
11876 -fsplit-stack. */
11878 static GTY(()) rtx split_stack_fn;
11880 /* A SYMBOL_REF for the more stack function when using the large
11881 model. */
11883 static GTY(()) rtx split_stack_fn_large;
11885 /* Handle -fsplit-stack. These are the first instructions in the
11886 function, even before the regular prologue. */
11888 void
11889 ix86_expand_split_stack_prologue (void)
11891 struct ix86_frame frame;
11892 HOST_WIDE_INT allocate;
11893 unsigned HOST_WIDE_INT args_size;
11894 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11895 rtx scratch_reg = NULL_RTX;
11896 rtx varargs_label = NULL_RTX;
11897 rtx fn;
11899 gcc_assert (flag_split_stack && reload_completed);
11901 ix86_finalize_stack_realign_flags ();
11902 ix86_compute_frame_layout (&frame);
11903 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11905 /* This is the label we will branch to if we have enough stack
11906 space. We expect the basic block reordering pass to reverse this
11907 branch if optimizing, so that we branch in the unlikely case. */
11908 label = gen_label_rtx ();
11910 /* We need to compare the stack pointer minus the frame size with
11911 the stack boundary in the TCB. The stack boundary always gives
11912 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11913 can compare directly. Otherwise we need to do an addition. */
11915 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11916 UNSPEC_STACK_CHECK);
11917 limit = gen_rtx_CONST (Pmode, limit);
11918 limit = gen_rtx_MEM (Pmode, limit);
11919 if (allocate < SPLIT_STACK_AVAILABLE)
11920 current = stack_pointer_rtx;
11921 else
11923 unsigned int scratch_regno;
11924 rtx offset;
11926 /* We need a scratch register to hold the stack pointer minus
11927 the required frame size. Since this is the very start of the
11928 function, the scratch register can be any caller-saved
11929 register which is not used for parameters. */
11930 offset = GEN_INT (- allocate);
11931 scratch_regno = split_stack_prologue_scratch_regno ();
11932 if (scratch_regno == INVALID_REGNUM)
11933 return;
11934 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11935 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11937 /* We don't use ix86_gen_add3 in this case because it will
11938 want to split to lea, but when not optimizing the insn
11939 will not be split after this point. */
11940 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11941 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11942 offset)));
11944 else
11946 emit_move_insn (scratch_reg, offset);
11947 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11948 stack_pointer_rtx));
11950 current = scratch_reg;
11953 ix86_expand_branch (GEU, current, limit, label);
11954 jump_insn = get_last_insn ();
11955 JUMP_LABEL (jump_insn) = label;
11957 /* Mark the jump as very likely to be taken. */
11958 add_int_reg_note (jump_insn, REG_BR_PROB,
11959 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11961 if (split_stack_fn == NULL_RTX)
11962 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11963 fn = split_stack_fn;
11965 /* Get more stack space. We pass in the desired stack space and the
11966 size of the arguments to copy to the new stack. In 32-bit mode
11967 we push the parameters; __morestack will return on a new stack
11968 anyhow. In 64-bit mode we pass the parameters in r10 and
11969 r11. */
11970 allocate_rtx = GEN_INT (allocate);
11971 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11972 call_fusage = NULL_RTX;
11973 if (TARGET_64BIT)
11975 rtx reg10, reg11;
11977 reg10 = gen_rtx_REG (Pmode, R10_REG);
11978 reg11 = gen_rtx_REG (Pmode, R11_REG);
11980 /* If this function uses a static chain, it will be in %r10.
11981 Preserve it across the call to __morestack. */
11982 if (DECL_STATIC_CHAIN (cfun->decl))
11984 rtx rax;
11986 rax = gen_rtx_REG (word_mode, AX_REG);
11987 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11988 use_reg (&call_fusage, rax);
11991 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11992 && !TARGET_PECOFF)
11994 HOST_WIDE_INT argval;
11996 gcc_assert (Pmode == DImode);
11997 /* When using the large model we need to load the address
11998 into a register, and we've run out of registers. So we
11999 switch to a different calling convention, and we call a
12000 different function: __morestack_large. We pass the
12001 argument size in the upper 32 bits of r10 and pass the
12002 frame size in the lower 32 bits. */
12003 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12004 gcc_assert ((args_size & 0xffffffff) == args_size);
12006 if (split_stack_fn_large == NULL_RTX)
12007 split_stack_fn_large =
12008 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12010 if (ix86_cmodel == CM_LARGE_PIC)
12012 rtx label, x;
12014 label = gen_label_rtx ();
12015 emit_label (label);
12016 LABEL_PRESERVE_P (label) = 1;
12017 emit_insn (gen_set_rip_rex64 (reg10, label));
12018 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12019 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12020 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12021 UNSPEC_GOT);
12022 x = gen_rtx_CONST (Pmode, x);
12023 emit_move_insn (reg11, x);
12024 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12025 x = gen_const_mem (Pmode, x);
12026 emit_move_insn (reg11, x);
12028 else
12029 emit_move_insn (reg11, split_stack_fn_large);
12031 fn = reg11;
12033 argval = ((args_size << 16) << 16) + allocate;
12034 emit_move_insn (reg10, GEN_INT (argval));
12036 else
12038 emit_move_insn (reg10, allocate_rtx);
12039 emit_move_insn (reg11, GEN_INT (args_size));
12040 use_reg (&call_fusage, reg11);
12043 use_reg (&call_fusage, reg10);
12045 else
12047 emit_insn (gen_push (GEN_INT (args_size)));
12048 emit_insn (gen_push (allocate_rtx));
12050 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12051 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12052 NULL_RTX, false);
12053 add_function_usage_to (call_insn, call_fusage);
12055 /* In order to make call/return prediction work right, we now need
12056 to execute a return instruction. See
12057 libgcc/config/i386/morestack.S for the details on how this works.
12059 For flow purposes gcc must not see this as a return
12060 instruction--we need control flow to continue at the subsequent
12061 label. Therefore, we use an unspec. */
12062 gcc_assert (crtl->args.pops_args < 65536);
12063 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12065 /* If we are in 64-bit mode and this function uses a static chain,
12066 we saved %r10 in %rax before calling _morestack. */
12067 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12068 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12069 gen_rtx_REG (word_mode, AX_REG));
12071 /* If this function calls va_start, we need to store a pointer to
12072 the arguments on the old stack, because they may not have been
12073 all copied to the new stack. At this point the old stack can be
12074 found at the frame pointer value used by __morestack, because
12075 __morestack has set that up before calling back to us. Here we
12076 store that pointer in a scratch register, and in
12077 ix86_expand_prologue we store the scratch register in a stack
12078 slot. */
12079 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12081 unsigned int scratch_regno;
12082 rtx frame_reg;
12083 int words;
12085 scratch_regno = split_stack_prologue_scratch_regno ();
12086 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12087 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12089 /* 64-bit:
12090 fp -> old fp value
12091 return address within this function
12092 return address of caller of this function
12093 stack arguments
12094 So we add three words to get to the stack arguments.
12096 32-bit:
12097 fp -> old fp value
12098 return address within this function
12099 first argument to __morestack
12100 second argument to __morestack
12101 return address of caller of this function
12102 stack arguments
12103 So we add five words to get to the stack arguments.
12105 words = TARGET_64BIT ? 3 : 5;
12106 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12107 gen_rtx_PLUS (Pmode, frame_reg,
12108 GEN_INT (words * UNITS_PER_WORD))));
12110 varargs_label = gen_label_rtx ();
12111 emit_jump_insn (gen_jump (varargs_label));
12112 JUMP_LABEL (get_last_insn ()) = varargs_label;
12114 emit_barrier ();
12117 emit_label (label);
12118 LABEL_NUSES (label) = 1;
12120 /* If this function calls va_start, we now have to set the scratch
12121 register for the case where we do not call __morestack. In this
12122 case we need to set it based on the stack pointer. */
12123 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12125 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12126 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12127 GEN_INT (UNITS_PER_WORD))));
12129 emit_label (varargs_label);
12130 LABEL_NUSES (varargs_label) = 1;
12134 /* We may have to tell the dataflow pass that the split stack prologue
12135 is initializing a scratch register. */
12137 static void
12138 ix86_live_on_entry (bitmap regs)
12140 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12142 gcc_assert (flag_split_stack);
12143 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12147 /* Extract the parts of an RTL expression that is a valid memory address
12148 for an instruction. Return 0 if the structure of the address is
12149 grossly off. Return -1 if the address contains ASHIFT, so it is not
12150 strictly valid, but still used for computing length of lea instruction. */
12153 ix86_decompose_address (rtx addr, struct ix86_address *out)
12155 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12156 rtx base_reg, index_reg;
12157 HOST_WIDE_INT scale = 1;
12158 rtx scale_rtx = NULL_RTX;
12159 rtx tmp;
12160 int retval = 1;
12161 enum ix86_address_seg seg = SEG_DEFAULT;
12163 /* Allow zero-extended SImode addresses,
12164 they will be emitted with addr32 prefix. */
12165 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12167 if (GET_CODE (addr) == ZERO_EXTEND
12168 && GET_MODE (XEXP (addr, 0)) == SImode)
12170 addr = XEXP (addr, 0);
12171 if (CONST_INT_P (addr))
12172 return 0;
12174 else if (GET_CODE (addr) == AND
12175 && const_32bit_mask (XEXP (addr, 1), DImode))
12177 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12178 if (addr == NULL_RTX)
12179 return 0;
12181 if (CONST_INT_P (addr))
12182 return 0;
12186 /* Allow SImode subregs of DImode addresses,
12187 they will be emitted with addr32 prefix. */
12188 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12190 if (GET_CODE (addr) == SUBREG
12191 && GET_MODE (SUBREG_REG (addr)) == DImode)
12193 addr = SUBREG_REG (addr);
12194 if (CONST_INT_P (addr))
12195 return 0;
12199 if (REG_P (addr))
12200 base = addr;
12201 else if (GET_CODE (addr) == SUBREG)
12203 if (REG_P (SUBREG_REG (addr)))
12204 base = addr;
12205 else
12206 return 0;
12208 else if (GET_CODE (addr) == PLUS)
12210 rtx addends[4], op;
12211 int n = 0, i;
12213 op = addr;
12216 if (n >= 4)
12217 return 0;
12218 addends[n++] = XEXP (op, 1);
12219 op = XEXP (op, 0);
12221 while (GET_CODE (op) == PLUS);
12222 if (n >= 4)
12223 return 0;
12224 addends[n] = op;
12226 for (i = n; i >= 0; --i)
12228 op = addends[i];
12229 switch (GET_CODE (op))
12231 case MULT:
12232 if (index)
12233 return 0;
12234 index = XEXP (op, 0);
12235 scale_rtx = XEXP (op, 1);
12236 break;
12238 case ASHIFT:
12239 if (index)
12240 return 0;
12241 index = XEXP (op, 0);
12242 tmp = XEXP (op, 1);
12243 if (!CONST_INT_P (tmp))
12244 return 0;
12245 scale = INTVAL (tmp);
12246 if ((unsigned HOST_WIDE_INT) scale > 3)
12247 return 0;
12248 scale = 1 << scale;
12249 break;
12251 case ZERO_EXTEND:
12252 op = XEXP (op, 0);
12253 if (GET_CODE (op) != UNSPEC)
12254 return 0;
12255 /* FALLTHRU */
12257 case UNSPEC:
12258 if (XINT (op, 1) == UNSPEC_TP
12259 && TARGET_TLS_DIRECT_SEG_REFS
12260 && seg == SEG_DEFAULT)
12261 seg = DEFAULT_TLS_SEG_REG;
12262 else
12263 return 0;
12264 break;
12266 case SUBREG:
12267 if (!REG_P (SUBREG_REG (op)))
12268 return 0;
12269 /* FALLTHRU */
12271 case REG:
12272 if (!base)
12273 base = op;
12274 else if (!index)
12275 index = op;
12276 else
12277 return 0;
12278 break;
12280 case CONST:
12281 case CONST_INT:
12282 case SYMBOL_REF:
12283 case LABEL_REF:
12284 if (disp)
12285 return 0;
12286 disp = op;
12287 break;
12289 default:
12290 return 0;
12294 else if (GET_CODE (addr) == MULT)
12296 index = XEXP (addr, 0); /* index*scale */
12297 scale_rtx = XEXP (addr, 1);
12299 else if (GET_CODE (addr) == ASHIFT)
12301 /* We're called for lea too, which implements ashift on occasion. */
12302 index = XEXP (addr, 0);
12303 tmp = XEXP (addr, 1);
12304 if (!CONST_INT_P (tmp))
12305 return 0;
12306 scale = INTVAL (tmp);
12307 if ((unsigned HOST_WIDE_INT) scale > 3)
12308 return 0;
12309 scale = 1 << scale;
12310 retval = -1;
12312 else
12313 disp = addr; /* displacement */
12315 if (index)
12317 if (REG_P (index))
12319 else if (GET_CODE (index) == SUBREG
12320 && REG_P (SUBREG_REG (index)))
12322 else
12323 return 0;
12326 /* Extract the integral value of scale. */
12327 if (scale_rtx)
12329 if (!CONST_INT_P (scale_rtx))
12330 return 0;
12331 scale = INTVAL (scale_rtx);
12334 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12335 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12337 /* Avoid useless 0 displacement. */
12338 if (disp == const0_rtx && (base || index))
12339 disp = NULL_RTX;
12341 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12342 if (base_reg && index_reg && scale == 1
12343 && (index_reg == arg_pointer_rtx
12344 || index_reg == frame_pointer_rtx
12345 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12347 rtx tmp;
12348 tmp = base, base = index, index = tmp;
12349 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12352 /* Special case: %ebp cannot be encoded as a base without a displacement.
12353 Similarly %r13. */
12354 if (!disp
12355 && base_reg
12356 && (base_reg == hard_frame_pointer_rtx
12357 || base_reg == frame_pointer_rtx
12358 || base_reg == arg_pointer_rtx
12359 || (REG_P (base_reg)
12360 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12361 || REGNO (base_reg) == R13_REG))))
12362 disp = const0_rtx;
12364 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12365 Avoid this by transforming to [%esi+0].
12366 Reload calls address legitimization without cfun defined, so we need
12367 to test cfun for being non-NULL. */
12368 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12369 && base_reg && !index_reg && !disp
12370 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12371 disp = const0_rtx;
12373 /* Special case: encode reg+reg instead of reg*2. */
12374 if (!base && index && scale == 2)
12375 base = index, base_reg = index_reg, scale = 1;
12377 /* Special case: scaling cannot be encoded without base or displacement. */
12378 if (!base && !disp && index && scale != 1)
12379 disp = const0_rtx;
12381 out->base = base;
12382 out->index = index;
12383 out->disp = disp;
12384 out->scale = scale;
12385 out->seg = seg;
12387 return retval;
12390 /* Return cost of the memory address x.
12391 For i386, it is better to use a complex address than let gcc copy
12392 the address into a reg and make a new pseudo. But not if the address
12393 requires to two regs - that would mean more pseudos with longer
12394 lifetimes. */
12395 static int
12396 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12397 addr_space_t as ATTRIBUTE_UNUSED,
12398 bool speed ATTRIBUTE_UNUSED)
12400 struct ix86_address parts;
12401 int cost = 1;
12402 int ok = ix86_decompose_address (x, &parts);
12404 gcc_assert (ok);
12406 if (parts.base && GET_CODE (parts.base) == SUBREG)
12407 parts.base = SUBREG_REG (parts.base);
12408 if (parts.index && GET_CODE (parts.index) == SUBREG)
12409 parts.index = SUBREG_REG (parts.index);
12411 /* Attempt to minimize number of registers in the address. */
12412 if ((parts.base
12413 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12414 || (parts.index
12415 && (!REG_P (parts.index)
12416 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12417 cost++;
12419 if (parts.base
12420 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12421 && parts.index
12422 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12423 && parts.base != parts.index)
12424 cost++;
12426 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12427 since it's predecode logic can't detect the length of instructions
12428 and it degenerates to vector decoded. Increase cost of such
12429 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12430 to split such addresses or even refuse such addresses at all.
12432 Following addressing modes are affected:
12433 [base+scale*index]
12434 [scale*index+disp]
12435 [base+index]
12437 The first and last case may be avoidable by explicitly coding the zero in
12438 memory address, but I don't have AMD-K6 machine handy to check this
12439 theory. */
12441 if (TARGET_K6
12442 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12443 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12444 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12445 cost += 10;
12447 return cost;
12450 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12451 this is used for to form addresses to local data when -fPIC is in
12452 use. */
12454 static bool
12455 darwin_local_data_pic (rtx disp)
12457 return (GET_CODE (disp) == UNSPEC
12458 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12461 /* Determine if a given RTX is a valid constant. We already know this
12462 satisfies CONSTANT_P. */
12464 static bool
12465 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12467 switch (GET_CODE (x))
12469 case CONST:
12470 x = XEXP (x, 0);
12472 if (GET_CODE (x) == PLUS)
12474 if (!CONST_INT_P (XEXP (x, 1)))
12475 return false;
12476 x = XEXP (x, 0);
12479 if (TARGET_MACHO && darwin_local_data_pic (x))
12480 return true;
12482 /* Only some unspecs are valid as "constants". */
12483 if (GET_CODE (x) == UNSPEC)
12484 switch (XINT (x, 1))
12486 case UNSPEC_GOT:
12487 case UNSPEC_GOTOFF:
12488 case UNSPEC_PLTOFF:
12489 return TARGET_64BIT;
12490 case UNSPEC_TPOFF:
12491 case UNSPEC_NTPOFF:
12492 x = XVECEXP (x, 0, 0);
12493 return (GET_CODE (x) == SYMBOL_REF
12494 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12495 case UNSPEC_DTPOFF:
12496 x = XVECEXP (x, 0, 0);
12497 return (GET_CODE (x) == SYMBOL_REF
12498 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12499 default:
12500 return false;
12503 /* We must have drilled down to a symbol. */
12504 if (GET_CODE (x) == LABEL_REF)
12505 return true;
12506 if (GET_CODE (x) != SYMBOL_REF)
12507 return false;
12508 /* FALLTHRU */
12510 case SYMBOL_REF:
12511 /* TLS symbols are never valid. */
12512 if (SYMBOL_REF_TLS_MODEL (x))
12513 return false;
12515 /* DLLIMPORT symbols are never valid. */
12516 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12517 && SYMBOL_REF_DLLIMPORT_P (x))
12518 return false;
12520 #if TARGET_MACHO
12521 /* mdynamic-no-pic */
12522 if (MACHO_DYNAMIC_NO_PIC_P)
12523 return machopic_symbol_defined_p (x);
12524 #endif
12525 break;
12527 case CONST_DOUBLE:
12528 if (GET_MODE (x) == TImode
12529 && x != CONST0_RTX (TImode)
12530 && !TARGET_64BIT)
12531 return false;
12532 break;
12534 case CONST_VECTOR:
12535 if (!standard_sse_constant_p (x))
12536 return false;
12538 default:
12539 break;
12542 /* Otherwise we handle everything else in the move patterns. */
12543 return true;
12546 /* Determine if it's legal to put X into the constant pool. This
12547 is not possible for the address of thread-local symbols, which
12548 is checked above. */
12550 static bool
12551 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12553 /* We can always put integral constants and vectors in memory. */
12554 switch (GET_CODE (x))
12556 case CONST_INT:
12557 case CONST_DOUBLE:
12558 case CONST_VECTOR:
12559 return false;
12561 default:
12562 break;
12564 return !ix86_legitimate_constant_p (mode, x);
12567 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12568 otherwise zero. */
12570 static bool
12571 is_imported_p (rtx x)
12573 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12574 || GET_CODE (x) != SYMBOL_REF)
12575 return false;
12577 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12581 /* Nonzero if the constant value X is a legitimate general operand
12582 when generating PIC code. It is given that flag_pic is on and
12583 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12585 bool
12586 legitimate_pic_operand_p (rtx x)
12588 rtx inner;
12590 switch (GET_CODE (x))
12592 case CONST:
12593 inner = XEXP (x, 0);
12594 if (GET_CODE (inner) == PLUS
12595 && CONST_INT_P (XEXP (inner, 1)))
12596 inner = XEXP (inner, 0);
12598 /* Only some unspecs are valid as "constants". */
12599 if (GET_CODE (inner) == UNSPEC)
12600 switch (XINT (inner, 1))
12602 case UNSPEC_GOT:
12603 case UNSPEC_GOTOFF:
12604 case UNSPEC_PLTOFF:
12605 return TARGET_64BIT;
12606 case UNSPEC_TPOFF:
12607 x = XVECEXP (inner, 0, 0);
12608 return (GET_CODE (x) == SYMBOL_REF
12609 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12610 case UNSPEC_MACHOPIC_OFFSET:
12611 return legitimate_pic_address_disp_p (x);
12612 default:
12613 return false;
12615 /* FALLTHRU */
12617 case SYMBOL_REF:
12618 case LABEL_REF:
12619 return legitimate_pic_address_disp_p (x);
12621 default:
12622 return true;
12626 /* Determine if a given CONST RTX is a valid memory displacement
12627 in PIC mode. */
12629 bool
12630 legitimate_pic_address_disp_p (rtx disp)
12632 bool saw_plus;
12634 /* In 64bit mode we can allow direct addresses of symbols and labels
12635 when they are not dynamic symbols. */
12636 if (TARGET_64BIT)
12638 rtx op0 = disp, op1;
12640 switch (GET_CODE (disp))
12642 case LABEL_REF:
12643 return true;
12645 case CONST:
12646 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12647 break;
12648 op0 = XEXP (XEXP (disp, 0), 0);
12649 op1 = XEXP (XEXP (disp, 0), 1);
12650 if (!CONST_INT_P (op1)
12651 || INTVAL (op1) >= 16*1024*1024
12652 || INTVAL (op1) < -16*1024*1024)
12653 break;
12654 if (GET_CODE (op0) == LABEL_REF)
12655 return true;
12656 if (GET_CODE (op0) == CONST
12657 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12658 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12659 return true;
12660 if (GET_CODE (op0) == UNSPEC
12661 && XINT (op0, 1) == UNSPEC_PCREL)
12662 return true;
12663 if (GET_CODE (op0) != SYMBOL_REF)
12664 break;
12665 /* FALLTHRU */
12667 case SYMBOL_REF:
12668 /* TLS references should always be enclosed in UNSPEC.
12669 The dllimported symbol needs always to be resolved. */
12670 if (SYMBOL_REF_TLS_MODEL (op0)
12671 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12672 return false;
12674 if (TARGET_PECOFF)
12676 if (is_imported_p (op0))
12677 return true;
12679 if (SYMBOL_REF_FAR_ADDR_P (op0)
12680 || !SYMBOL_REF_LOCAL_P (op0))
12681 break;
12683 /* Function-symbols need to be resolved only for
12684 large-model.
12685 For the small-model we don't need to resolve anything
12686 here. */
12687 if ((ix86_cmodel != CM_LARGE_PIC
12688 && SYMBOL_REF_FUNCTION_P (op0))
12689 || ix86_cmodel == CM_SMALL_PIC)
12690 return true;
12691 /* Non-external symbols don't need to be resolved for
12692 large, and medium-model. */
12693 if ((ix86_cmodel == CM_LARGE_PIC
12694 || ix86_cmodel == CM_MEDIUM_PIC)
12695 && !SYMBOL_REF_EXTERNAL_P (op0))
12696 return true;
12698 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12699 && SYMBOL_REF_LOCAL_P (op0)
12700 && ix86_cmodel != CM_LARGE_PIC)
12701 return true;
12702 break;
12704 default:
12705 break;
12708 if (GET_CODE (disp) != CONST)
12709 return false;
12710 disp = XEXP (disp, 0);
12712 if (TARGET_64BIT)
12714 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12715 of GOT tables. We should not need these anyway. */
12716 if (GET_CODE (disp) != UNSPEC
12717 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12718 && XINT (disp, 1) != UNSPEC_GOTOFF
12719 && XINT (disp, 1) != UNSPEC_PCREL
12720 && XINT (disp, 1) != UNSPEC_PLTOFF))
12721 return false;
12723 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12724 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12725 return false;
12726 return true;
12729 saw_plus = false;
12730 if (GET_CODE (disp) == PLUS)
12732 if (!CONST_INT_P (XEXP (disp, 1)))
12733 return false;
12734 disp = XEXP (disp, 0);
12735 saw_plus = true;
12738 if (TARGET_MACHO && darwin_local_data_pic (disp))
12739 return true;
12741 if (GET_CODE (disp) != UNSPEC)
12742 return false;
12744 switch (XINT (disp, 1))
12746 case UNSPEC_GOT:
12747 if (saw_plus)
12748 return false;
12749 /* We need to check for both symbols and labels because VxWorks loads
12750 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12751 details. */
12752 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12753 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12754 case UNSPEC_GOTOFF:
12755 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12756 While ABI specify also 32bit relocation but we don't produce it in
12757 small PIC model at all. */
12758 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12759 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12760 && !TARGET_64BIT)
12761 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12762 return false;
12763 case UNSPEC_GOTTPOFF:
12764 case UNSPEC_GOTNTPOFF:
12765 case UNSPEC_INDNTPOFF:
12766 if (saw_plus)
12767 return false;
12768 disp = XVECEXP (disp, 0, 0);
12769 return (GET_CODE (disp) == SYMBOL_REF
12770 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12771 case UNSPEC_NTPOFF:
12772 disp = XVECEXP (disp, 0, 0);
12773 return (GET_CODE (disp) == SYMBOL_REF
12774 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12775 case UNSPEC_DTPOFF:
12776 disp = XVECEXP (disp, 0, 0);
12777 return (GET_CODE (disp) == SYMBOL_REF
12778 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12781 return false;
12784 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12785 replace the input X, or the original X if no replacement is called for.
12786 The output parameter *WIN is 1 if the calling macro should goto WIN,
12787 0 if it should not. */
12789 bool
12790 ix86_legitimize_reload_address (rtx x,
12791 enum machine_mode mode ATTRIBUTE_UNUSED,
12792 int opnum, int type,
12793 int ind_levels ATTRIBUTE_UNUSED)
12795 /* Reload can generate:
12797 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12798 (reg:DI 97))
12799 (reg:DI 2 cx))
12801 This RTX is rejected from ix86_legitimate_address_p due to
12802 non-strictness of base register 97. Following this rejection,
12803 reload pushes all three components into separate registers,
12804 creating invalid memory address RTX.
12806 Following code reloads only the invalid part of the
12807 memory address RTX. */
12809 if (GET_CODE (x) == PLUS
12810 && REG_P (XEXP (x, 1))
12811 && GET_CODE (XEXP (x, 0)) == PLUS
12812 && REG_P (XEXP (XEXP (x, 0), 1)))
12814 rtx base, index;
12815 bool something_reloaded = false;
12817 base = XEXP (XEXP (x, 0), 1);
12818 if (!REG_OK_FOR_BASE_STRICT_P (base))
12820 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12821 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12822 opnum, (enum reload_type) type);
12823 something_reloaded = true;
12826 index = XEXP (x, 1);
12827 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12829 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12830 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12831 opnum, (enum reload_type) type);
12832 something_reloaded = true;
12835 gcc_assert (something_reloaded);
12836 return true;
12839 return false;
12842 /* Determine if op is suitable RTX for an address register.
12843 Return naked register if a register or a register subreg is
12844 found, otherwise return NULL_RTX. */
12846 static rtx
12847 ix86_validate_address_register (rtx op)
12849 enum machine_mode mode = GET_MODE (op);
12851 /* Only SImode or DImode registers can form the address. */
12852 if (mode != SImode && mode != DImode)
12853 return NULL_RTX;
12855 if (REG_P (op))
12856 return op;
12857 else if (GET_CODE (op) == SUBREG)
12859 rtx reg = SUBREG_REG (op);
12861 if (!REG_P (reg))
12862 return NULL_RTX;
12864 mode = GET_MODE (reg);
12866 /* Don't allow SUBREGs that span more than a word. It can
12867 lead to spill failures when the register is one word out
12868 of a two word structure. */
12869 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12870 return NULL_RTX;
12872 /* Allow only SUBREGs of non-eliminable hard registers. */
12873 if (register_no_elim_operand (reg, mode))
12874 return reg;
12877 /* Op is not a register. */
12878 return NULL_RTX;
12881 /* Recognizes RTL expressions that are valid memory addresses for an
12882 instruction. The MODE argument is the machine mode for the MEM
12883 expression that wants to use this address.
12885 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12886 convert common non-canonical forms to canonical form so that they will
12887 be recognized. */
12889 static bool
12890 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12891 rtx addr, bool strict)
12893 struct ix86_address parts;
12894 rtx base, index, disp;
12895 HOST_WIDE_INT scale;
12896 enum ix86_address_seg seg;
12898 if (ix86_decompose_address (addr, &parts) <= 0)
12899 /* Decomposition failed. */
12900 return false;
12902 base = parts.base;
12903 index = parts.index;
12904 disp = parts.disp;
12905 scale = parts.scale;
12906 seg = parts.seg;
12908 /* Validate base register. */
12909 if (base)
12911 rtx reg = ix86_validate_address_register (base);
12913 if (reg == NULL_RTX)
12914 return false;
12916 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12917 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12918 /* Base is not valid. */
12919 return false;
12922 /* Validate index register. */
12923 if (index)
12925 rtx reg = ix86_validate_address_register (index);
12927 if (reg == NULL_RTX)
12928 return false;
12930 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12931 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12932 /* Index is not valid. */
12933 return false;
12936 /* Index and base should have the same mode. */
12937 if (base && index
12938 && GET_MODE (base) != GET_MODE (index))
12939 return false;
12941 /* Address override works only on the (%reg) part of %fs:(%reg). */
12942 if (seg != SEG_DEFAULT
12943 && ((base && GET_MODE (base) != word_mode)
12944 || (index && GET_MODE (index) != word_mode)))
12945 return false;
12947 /* Validate scale factor. */
12948 if (scale != 1)
12950 if (!index)
12951 /* Scale without index. */
12952 return false;
12954 if (scale != 2 && scale != 4 && scale != 8)
12955 /* Scale is not a valid multiplier. */
12956 return false;
12959 /* Validate displacement. */
12960 if (disp)
12962 if (GET_CODE (disp) == CONST
12963 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12964 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12965 switch (XINT (XEXP (disp, 0), 1))
12967 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12968 used. While ABI specify also 32bit relocations, we don't produce
12969 them at all and use IP relative instead. */
12970 case UNSPEC_GOT:
12971 case UNSPEC_GOTOFF:
12972 gcc_assert (flag_pic);
12973 if (!TARGET_64BIT)
12974 goto is_legitimate_pic;
12976 /* 64bit address unspec. */
12977 return false;
12979 case UNSPEC_GOTPCREL:
12980 case UNSPEC_PCREL:
12981 gcc_assert (flag_pic);
12982 goto is_legitimate_pic;
12984 case UNSPEC_GOTTPOFF:
12985 case UNSPEC_GOTNTPOFF:
12986 case UNSPEC_INDNTPOFF:
12987 case UNSPEC_NTPOFF:
12988 case UNSPEC_DTPOFF:
12989 break;
12991 case UNSPEC_STACK_CHECK:
12992 gcc_assert (flag_split_stack);
12993 break;
12995 default:
12996 /* Invalid address unspec. */
12997 return false;
13000 else if (SYMBOLIC_CONST (disp)
13001 && (flag_pic
13002 || (TARGET_MACHO
13003 #if TARGET_MACHO
13004 && MACHOPIC_INDIRECT
13005 && !machopic_operand_p (disp)
13006 #endif
13010 is_legitimate_pic:
13011 if (TARGET_64BIT && (index || base))
13013 /* foo@dtpoff(%rX) is ok. */
13014 if (GET_CODE (disp) != CONST
13015 || GET_CODE (XEXP (disp, 0)) != PLUS
13016 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13017 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13018 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13019 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13020 /* Non-constant pic memory reference. */
13021 return false;
13023 else if ((!TARGET_MACHO || flag_pic)
13024 && ! legitimate_pic_address_disp_p (disp))
13025 /* Displacement is an invalid pic construct. */
13026 return false;
13027 #if TARGET_MACHO
13028 else if (MACHO_DYNAMIC_NO_PIC_P
13029 && !ix86_legitimate_constant_p (Pmode, disp))
13030 /* displacment must be referenced via non_lazy_pointer */
13031 return false;
13032 #endif
13034 /* This code used to verify that a symbolic pic displacement
13035 includes the pic_offset_table_rtx register.
13037 While this is good idea, unfortunately these constructs may
13038 be created by "adds using lea" optimization for incorrect
13039 code like:
13041 int a;
13042 int foo(int i)
13044 return *(&a+i);
13047 This code is nonsensical, but results in addressing
13048 GOT table with pic_offset_table_rtx base. We can't
13049 just refuse it easily, since it gets matched by
13050 "addsi3" pattern, that later gets split to lea in the
13051 case output register differs from input. While this
13052 can be handled by separate addsi pattern for this case
13053 that never results in lea, this seems to be easier and
13054 correct fix for crash to disable this test. */
13056 else if (GET_CODE (disp) != LABEL_REF
13057 && !CONST_INT_P (disp)
13058 && (GET_CODE (disp) != CONST
13059 || !ix86_legitimate_constant_p (Pmode, disp))
13060 && (GET_CODE (disp) != SYMBOL_REF
13061 || !ix86_legitimate_constant_p (Pmode, disp)))
13062 /* Displacement is not constant. */
13063 return false;
13064 else if (TARGET_64BIT
13065 && !x86_64_immediate_operand (disp, VOIDmode))
13066 /* Displacement is out of range. */
13067 return false;
13068 /* In x32 mode, constant addresses are sign extended to 64bit, so
13069 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13070 else if (TARGET_X32 && !(index || base)
13071 && CONST_INT_P (disp)
13072 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13073 return false;
13076 /* Everything looks valid. */
13077 return true;
13080 /* Determine if a given RTX is a valid constant address. */
13082 bool
13083 constant_address_p (rtx x)
13085 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13088 /* Return a unique alias set for the GOT. */
13090 static alias_set_type
13091 ix86_GOT_alias_set (void)
13093 static alias_set_type set = -1;
13094 if (set == -1)
13095 set = new_alias_set ();
13096 return set;
13099 /* Return a legitimate reference for ORIG (an address) using the
13100 register REG. If REG is 0, a new pseudo is generated.
13102 There are two types of references that must be handled:
13104 1. Global data references must load the address from the GOT, via
13105 the PIC reg. An insn is emitted to do this load, and the reg is
13106 returned.
13108 2. Static data references, constant pool addresses, and code labels
13109 compute the address as an offset from the GOT, whose base is in
13110 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13111 differentiate them from global data objects. The returned
13112 address is the PIC reg + an unspec constant.
13114 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13115 reg also appears in the address. */
13117 static rtx
13118 legitimize_pic_address (rtx orig, rtx reg)
13120 rtx addr = orig;
13121 rtx new_rtx = orig;
13123 #if TARGET_MACHO
13124 if (TARGET_MACHO && !TARGET_64BIT)
13126 if (reg == 0)
13127 reg = gen_reg_rtx (Pmode);
13128 /* Use the generic Mach-O PIC machinery. */
13129 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13131 #endif
13133 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13135 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13136 if (tmp)
13137 return tmp;
13140 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13141 new_rtx = addr;
13142 else if (TARGET_64BIT && !TARGET_PECOFF
13143 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13145 rtx tmpreg;
13146 /* This symbol may be referenced via a displacement from the PIC
13147 base address (@GOTOFF). */
13149 if (reload_in_progress)
13150 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13151 if (GET_CODE (addr) == CONST)
13152 addr = XEXP (addr, 0);
13153 if (GET_CODE (addr) == PLUS)
13155 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13156 UNSPEC_GOTOFF);
13157 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13159 else
13160 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13161 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13162 if (!reg)
13163 tmpreg = gen_reg_rtx (Pmode);
13164 else
13165 tmpreg = reg;
13166 emit_move_insn (tmpreg, new_rtx);
13168 if (reg != 0)
13170 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13171 tmpreg, 1, OPTAB_DIRECT);
13172 new_rtx = reg;
13174 else
13175 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13177 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13179 /* This symbol may be referenced via a displacement from the PIC
13180 base address (@GOTOFF). */
13182 if (reload_in_progress)
13183 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13184 if (GET_CODE (addr) == CONST)
13185 addr = XEXP (addr, 0);
13186 if (GET_CODE (addr) == PLUS)
13188 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13189 UNSPEC_GOTOFF);
13190 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13192 else
13193 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13194 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13195 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13197 if (reg != 0)
13199 emit_move_insn (reg, new_rtx);
13200 new_rtx = reg;
13203 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13204 /* We can't use @GOTOFF for text labels on VxWorks;
13205 see gotoff_operand. */
13206 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13208 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13209 if (tmp)
13210 return tmp;
13212 /* For x64 PE-COFF there is no GOT table. So we use address
13213 directly. */
13214 if (TARGET_64BIT && TARGET_PECOFF)
13216 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13217 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13219 if (reg == 0)
13220 reg = gen_reg_rtx (Pmode);
13221 emit_move_insn (reg, new_rtx);
13222 new_rtx = reg;
13224 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13226 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13227 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13228 new_rtx = gen_const_mem (Pmode, new_rtx);
13229 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13231 if (reg == 0)
13232 reg = gen_reg_rtx (Pmode);
13233 /* Use directly gen_movsi, otherwise the address is loaded
13234 into register for CSE. We don't want to CSE this addresses,
13235 instead we CSE addresses from the GOT table, so skip this. */
13236 emit_insn (gen_movsi (reg, new_rtx));
13237 new_rtx = reg;
13239 else
13241 /* This symbol must be referenced via a load from the
13242 Global Offset Table (@GOT). */
13244 if (reload_in_progress)
13245 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13246 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13247 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13248 if (TARGET_64BIT)
13249 new_rtx = force_reg (Pmode, new_rtx);
13250 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13251 new_rtx = gen_const_mem (Pmode, new_rtx);
13252 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13254 if (reg == 0)
13255 reg = gen_reg_rtx (Pmode);
13256 emit_move_insn (reg, new_rtx);
13257 new_rtx = reg;
13260 else
13262 if (CONST_INT_P (addr)
13263 && !x86_64_immediate_operand (addr, VOIDmode))
13265 if (reg)
13267 emit_move_insn (reg, addr);
13268 new_rtx = reg;
13270 else
13271 new_rtx = force_reg (Pmode, addr);
13273 else if (GET_CODE (addr) == CONST)
13275 addr = XEXP (addr, 0);
13277 /* We must match stuff we generate before. Assume the only
13278 unspecs that can get here are ours. Not that we could do
13279 anything with them anyway.... */
13280 if (GET_CODE (addr) == UNSPEC
13281 || (GET_CODE (addr) == PLUS
13282 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13283 return orig;
13284 gcc_assert (GET_CODE (addr) == PLUS);
13286 if (GET_CODE (addr) == PLUS)
13288 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13290 /* Check first to see if this is a constant offset from a @GOTOFF
13291 symbol reference. */
13292 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13293 && CONST_INT_P (op1))
13295 if (!TARGET_64BIT)
13297 if (reload_in_progress)
13298 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13299 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13300 UNSPEC_GOTOFF);
13301 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13302 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13303 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13305 if (reg != 0)
13307 emit_move_insn (reg, new_rtx);
13308 new_rtx = reg;
13311 else
13313 if (INTVAL (op1) < -16*1024*1024
13314 || INTVAL (op1) >= 16*1024*1024)
13316 if (!x86_64_immediate_operand (op1, Pmode))
13317 op1 = force_reg (Pmode, op1);
13318 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13322 else
13324 rtx base = legitimize_pic_address (op0, reg);
13325 enum machine_mode mode = GET_MODE (base);
13326 new_rtx
13327 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13329 if (CONST_INT_P (new_rtx))
13331 if (INTVAL (new_rtx) < -16*1024*1024
13332 || INTVAL (new_rtx) >= 16*1024*1024)
13334 if (!x86_64_immediate_operand (new_rtx, mode))
13335 new_rtx = force_reg (mode, new_rtx);
13336 new_rtx
13337 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13339 else
13340 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13342 else
13344 if (GET_CODE (new_rtx) == PLUS
13345 && CONSTANT_P (XEXP (new_rtx, 1)))
13347 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13348 new_rtx = XEXP (new_rtx, 1);
13350 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13355 return new_rtx;
13358 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13360 static rtx
13361 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13363 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13365 if (GET_MODE (tp) != tp_mode)
13367 gcc_assert (GET_MODE (tp) == SImode);
13368 gcc_assert (tp_mode == DImode);
13370 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13373 if (to_reg)
13374 tp = copy_to_mode_reg (tp_mode, tp);
13376 return tp;
13379 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13381 static GTY(()) rtx ix86_tls_symbol;
13383 static rtx
13384 ix86_tls_get_addr (void)
13386 if (!ix86_tls_symbol)
13388 const char *sym
13389 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13390 ? "___tls_get_addr" : "__tls_get_addr");
13392 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13395 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13397 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13398 UNSPEC_PLTOFF);
13399 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13400 gen_rtx_CONST (Pmode, unspec));
13403 return ix86_tls_symbol;
13406 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13408 static GTY(()) rtx ix86_tls_module_base_symbol;
13411 ix86_tls_module_base (void)
13413 if (!ix86_tls_module_base_symbol)
13415 ix86_tls_module_base_symbol
13416 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13418 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13419 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13422 return ix86_tls_module_base_symbol;
13425 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13426 false if we expect this to be used for a memory address and true if
13427 we expect to load the address into a register. */
13429 static rtx
13430 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13432 rtx dest, base, off;
13433 rtx pic = NULL_RTX, tp = NULL_RTX;
13434 enum machine_mode tp_mode = Pmode;
13435 int type;
13437 /* Fall back to global dynamic model if tool chain cannot support local
13438 dynamic. */
13439 if (TARGET_SUN_TLS && !TARGET_64BIT
13440 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13441 && model == TLS_MODEL_LOCAL_DYNAMIC)
13442 model = TLS_MODEL_GLOBAL_DYNAMIC;
13444 switch (model)
13446 case TLS_MODEL_GLOBAL_DYNAMIC:
13447 dest = gen_reg_rtx (Pmode);
13449 if (!TARGET_64BIT)
13451 if (flag_pic && !TARGET_PECOFF)
13452 pic = pic_offset_table_rtx;
13453 else
13455 pic = gen_reg_rtx (Pmode);
13456 emit_insn (gen_set_got (pic));
13460 if (TARGET_GNU2_TLS)
13462 if (TARGET_64BIT)
13463 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13464 else
13465 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13467 tp = get_thread_pointer (Pmode, true);
13468 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13470 if (GET_MODE (x) != Pmode)
13471 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13473 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13475 else
13477 rtx caddr = ix86_tls_get_addr ();
13479 if (TARGET_64BIT)
13481 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13482 rtx insns;
13484 start_sequence ();
13485 emit_call_insn
13486 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13487 insns = get_insns ();
13488 end_sequence ();
13490 if (GET_MODE (x) != Pmode)
13491 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13493 RTL_CONST_CALL_P (insns) = 1;
13494 emit_libcall_block (insns, dest, rax, x);
13496 else
13497 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13499 break;
13501 case TLS_MODEL_LOCAL_DYNAMIC:
13502 base = gen_reg_rtx (Pmode);
13504 if (!TARGET_64BIT)
13506 if (flag_pic)
13507 pic = pic_offset_table_rtx;
13508 else
13510 pic = gen_reg_rtx (Pmode);
13511 emit_insn (gen_set_got (pic));
13515 if (TARGET_GNU2_TLS)
13517 rtx tmp = ix86_tls_module_base ();
13519 if (TARGET_64BIT)
13520 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13521 else
13522 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13524 tp = get_thread_pointer (Pmode, true);
13525 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13526 gen_rtx_MINUS (Pmode, tmp, tp));
13528 else
13530 rtx caddr = ix86_tls_get_addr ();
13532 if (TARGET_64BIT)
13534 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13535 rtx insns, eqv;
13537 start_sequence ();
13538 emit_call_insn
13539 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13540 insns = get_insns ();
13541 end_sequence ();
13543 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13544 share the LD_BASE result with other LD model accesses. */
13545 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13546 UNSPEC_TLS_LD_BASE);
13548 RTL_CONST_CALL_P (insns) = 1;
13549 emit_libcall_block (insns, base, rax, eqv);
13551 else
13552 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13555 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13556 off = gen_rtx_CONST (Pmode, off);
13558 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13560 if (TARGET_GNU2_TLS)
13562 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13564 if (GET_MODE (x) != Pmode)
13565 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13567 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13569 break;
13571 case TLS_MODEL_INITIAL_EXEC:
13572 if (TARGET_64BIT)
13574 if (TARGET_SUN_TLS && !TARGET_X32)
13576 /* The Sun linker took the AMD64 TLS spec literally
13577 and can only handle %rax as destination of the
13578 initial executable code sequence. */
13580 dest = gen_reg_rtx (DImode);
13581 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13582 return dest;
13585 /* Generate DImode references to avoid %fs:(%reg32)
13586 problems and linker IE->LE relaxation bug. */
13587 tp_mode = DImode;
13588 pic = NULL;
13589 type = UNSPEC_GOTNTPOFF;
13591 else if (flag_pic)
13593 if (reload_in_progress)
13594 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13595 pic = pic_offset_table_rtx;
13596 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13598 else if (!TARGET_ANY_GNU_TLS)
13600 pic = gen_reg_rtx (Pmode);
13601 emit_insn (gen_set_got (pic));
13602 type = UNSPEC_GOTTPOFF;
13604 else
13606 pic = NULL;
13607 type = UNSPEC_INDNTPOFF;
13610 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13611 off = gen_rtx_CONST (tp_mode, off);
13612 if (pic)
13613 off = gen_rtx_PLUS (tp_mode, pic, off);
13614 off = gen_const_mem (tp_mode, off);
13615 set_mem_alias_set (off, ix86_GOT_alias_set ());
13617 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13619 base = get_thread_pointer (tp_mode,
13620 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13621 off = force_reg (tp_mode, off);
13622 return gen_rtx_PLUS (tp_mode, base, off);
13624 else
13626 base = get_thread_pointer (Pmode, true);
13627 dest = gen_reg_rtx (Pmode);
13628 emit_insn (ix86_gen_sub3 (dest, base, off));
13630 break;
13632 case TLS_MODEL_LOCAL_EXEC:
13633 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13634 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13635 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13636 off = gen_rtx_CONST (Pmode, off);
13638 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13640 base = get_thread_pointer (Pmode,
13641 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13642 return gen_rtx_PLUS (Pmode, base, off);
13644 else
13646 base = get_thread_pointer (Pmode, true);
13647 dest = gen_reg_rtx (Pmode);
13648 emit_insn (ix86_gen_sub3 (dest, base, off));
13650 break;
13652 default:
13653 gcc_unreachable ();
13656 return dest;
13659 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13660 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13661 unique refptr-DECL symbol corresponding to symbol DECL. */
13663 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13664 htab_t dllimport_map;
13666 static tree
13667 get_dllimport_decl (tree decl, bool beimport)
13669 struct tree_map *h, in;
13670 void **loc;
13671 const char *name;
13672 const char *prefix;
13673 size_t namelen, prefixlen;
13674 char *imp_name;
13675 tree to;
13676 rtx rtl;
13678 if (!dllimport_map)
13679 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13681 in.hash = htab_hash_pointer (decl);
13682 in.base.from = decl;
13683 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13684 h = (struct tree_map *) *loc;
13685 if (h)
13686 return h->to;
13688 *loc = h = ggc_alloc_tree_map ();
13689 h->hash = in.hash;
13690 h->base.from = decl;
13691 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13692 VAR_DECL, NULL, ptr_type_node);
13693 DECL_ARTIFICIAL (to) = 1;
13694 DECL_IGNORED_P (to) = 1;
13695 DECL_EXTERNAL (to) = 1;
13696 TREE_READONLY (to) = 1;
13698 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13699 name = targetm.strip_name_encoding (name);
13700 if (beimport)
13701 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13702 ? "*__imp_" : "*__imp__";
13703 else
13704 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13705 namelen = strlen (name);
13706 prefixlen = strlen (prefix);
13707 imp_name = (char *) alloca (namelen + prefixlen + 1);
13708 memcpy (imp_name, prefix, prefixlen);
13709 memcpy (imp_name + prefixlen, name, namelen + 1);
13711 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13712 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13713 SET_SYMBOL_REF_DECL (rtl, to);
13714 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13715 if (!beimport)
13717 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13718 #ifdef SUB_TARGET_RECORD_STUB
13719 SUB_TARGET_RECORD_STUB (name);
13720 #endif
13723 rtl = gen_const_mem (Pmode, rtl);
13724 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13726 SET_DECL_RTL (to, rtl);
13727 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13729 return to;
13732 /* Expand SYMBOL into its corresponding far-addresse symbol.
13733 WANT_REG is true if we require the result be a register. */
13735 static rtx
13736 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13738 tree imp_decl;
13739 rtx x;
13741 gcc_assert (SYMBOL_REF_DECL (symbol));
13742 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13744 x = DECL_RTL (imp_decl);
13745 if (want_reg)
13746 x = force_reg (Pmode, x);
13747 return x;
13750 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13751 true if we require the result be a register. */
13753 static rtx
13754 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13756 tree imp_decl;
13757 rtx x;
13759 gcc_assert (SYMBOL_REF_DECL (symbol));
13760 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13762 x = DECL_RTL (imp_decl);
13763 if (want_reg)
13764 x = force_reg (Pmode, x);
13765 return x;
13768 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13769 is true if we require the result be a register. */
13771 static rtx
13772 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13774 if (!TARGET_PECOFF)
13775 return NULL_RTX;
13777 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13779 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13780 return legitimize_dllimport_symbol (addr, inreg);
13781 if (GET_CODE (addr) == CONST
13782 && GET_CODE (XEXP (addr, 0)) == PLUS
13783 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13784 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13786 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13787 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13791 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13792 return NULL_RTX;
13793 if (GET_CODE (addr) == SYMBOL_REF
13794 && !is_imported_p (addr)
13795 && SYMBOL_REF_EXTERNAL_P (addr)
13796 && SYMBOL_REF_DECL (addr))
13797 return legitimize_pe_coff_extern_decl (addr, inreg);
13799 if (GET_CODE (addr) == CONST
13800 && GET_CODE (XEXP (addr, 0)) == PLUS
13801 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13802 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13803 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13804 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13806 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13807 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13809 return NULL_RTX;
13812 /* Try machine-dependent ways of modifying an illegitimate address
13813 to be legitimate. If we find one, return the new, valid address.
13814 This macro is used in only one place: `memory_address' in explow.c.
13816 OLDX is the address as it was before break_out_memory_refs was called.
13817 In some cases it is useful to look at this to decide what needs to be done.
13819 It is always safe for this macro to do nothing. It exists to recognize
13820 opportunities to optimize the output.
13822 For the 80386, we handle X+REG by loading X into a register R and
13823 using R+REG. R will go in a general reg and indexing will be used.
13824 However, if REG is a broken-out memory address or multiplication,
13825 nothing needs to be done because REG can certainly go in a general reg.
13827 When -fpic is used, special handling is needed for symbolic references.
13828 See comments by legitimize_pic_address in i386.c for details. */
13830 static rtx
13831 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13832 enum machine_mode mode)
13834 int changed = 0;
13835 unsigned log;
13837 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13838 if (log)
13839 return legitimize_tls_address (x, (enum tls_model) log, false);
13840 if (GET_CODE (x) == CONST
13841 && GET_CODE (XEXP (x, 0)) == PLUS
13842 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13843 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13845 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13846 (enum tls_model) log, false);
13847 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13850 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13852 rtx tmp = legitimize_pe_coff_symbol (x, true);
13853 if (tmp)
13854 return tmp;
13857 if (flag_pic && SYMBOLIC_CONST (x))
13858 return legitimize_pic_address (x, 0);
13860 #if TARGET_MACHO
13861 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13862 return machopic_indirect_data_reference (x, 0);
13863 #endif
13865 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13866 if (GET_CODE (x) == ASHIFT
13867 && CONST_INT_P (XEXP (x, 1))
13868 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13870 changed = 1;
13871 log = INTVAL (XEXP (x, 1));
13872 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13873 GEN_INT (1 << log));
13876 if (GET_CODE (x) == PLUS)
13878 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13880 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13881 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13882 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13884 changed = 1;
13885 log = INTVAL (XEXP (XEXP (x, 0), 1));
13886 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13887 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13888 GEN_INT (1 << log));
13891 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13892 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13893 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13895 changed = 1;
13896 log = INTVAL (XEXP (XEXP (x, 1), 1));
13897 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13898 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13899 GEN_INT (1 << log));
13902 /* Put multiply first if it isn't already. */
13903 if (GET_CODE (XEXP (x, 1)) == MULT)
13905 rtx tmp = XEXP (x, 0);
13906 XEXP (x, 0) = XEXP (x, 1);
13907 XEXP (x, 1) = tmp;
13908 changed = 1;
13911 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13912 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13913 created by virtual register instantiation, register elimination, and
13914 similar optimizations. */
13915 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13917 changed = 1;
13918 x = gen_rtx_PLUS (Pmode,
13919 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13920 XEXP (XEXP (x, 1), 0)),
13921 XEXP (XEXP (x, 1), 1));
13924 /* Canonicalize
13925 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13926 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13927 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13928 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13929 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13930 && CONSTANT_P (XEXP (x, 1)))
13932 rtx constant;
13933 rtx other = NULL_RTX;
13935 if (CONST_INT_P (XEXP (x, 1)))
13937 constant = XEXP (x, 1);
13938 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13940 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13942 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13943 other = XEXP (x, 1);
13945 else
13946 constant = 0;
13948 if (constant)
13950 changed = 1;
13951 x = gen_rtx_PLUS (Pmode,
13952 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13953 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13954 plus_constant (Pmode, other,
13955 INTVAL (constant)));
13959 if (changed && ix86_legitimate_address_p (mode, x, false))
13960 return x;
13962 if (GET_CODE (XEXP (x, 0)) == MULT)
13964 changed = 1;
13965 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13968 if (GET_CODE (XEXP (x, 1)) == MULT)
13970 changed = 1;
13971 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13974 if (changed
13975 && REG_P (XEXP (x, 1))
13976 && REG_P (XEXP (x, 0)))
13977 return x;
13979 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13981 changed = 1;
13982 x = legitimize_pic_address (x, 0);
13985 if (changed && ix86_legitimate_address_p (mode, x, false))
13986 return x;
13988 if (REG_P (XEXP (x, 0)))
13990 rtx temp = gen_reg_rtx (Pmode);
13991 rtx val = force_operand (XEXP (x, 1), temp);
13992 if (val != temp)
13994 val = convert_to_mode (Pmode, val, 1);
13995 emit_move_insn (temp, val);
13998 XEXP (x, 1) = temp;
13999 return x;
14002 else if (REG_P (XEXP (x, 1)))
14004 rtx temp = gen_reg_rtx (Pmode);
14005 rtx val = force_operand (XEXP (x, 0), temp);
14006 if (val != temp)
14008 val = convert_to_mode (Pmode, val, 1);
14009 emit_move_insn (temp, val);
14012 XEXP (x, 0) = temp;
14013 return x;
14017 return x;
14020 /* Print an integer constant expression in assembler syntax. Addition
14021 and subtraction are the only arithmetic that may appear in these
14022 expressions. FILE is the stdio stream to write to, X is the rtx, and
14023 CODE is the operand print code from the output string. */
14025 static void
14026 output_pic_addr_const (FILE *file, rtx x, int code)
14028 char buf[256];
14030 switch (GET_CODE (x))
14032 case PC:
14033 gcc_assert (flag_pic);
14034 putc ('.', file);
14035 break;
14037 case SYMBOL_REF:
14038 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14039 output_addr_const (file, x);
14040 else
14042 const char *name = XSTR (x, 0);
14044 /* Mark the decl as referenced so that cgraph will
14045 output the function. */
14046 if (SYMBOL_REF_DECL (x))
14047 mark_decl_referenced (SYMBOL_REF_DECL (x));
14049 #if TARGET_MACHO
14050 if (MACHOPIC_INDIRECT
14051 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14052 name = machopic_indirection_name (x, /*stub_p=*/true);
14053 #endif
14054 assemble_name (file, name);
14056 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14057 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14058 fputs ("@PLT", file);
14059 break;
14061 case LABEL_REF:
14062 x = XEXP (x, 0);
14063 /* FALLTHRU */
14064 case CODE_LABEL:
14065 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14066 assemble_name (asm_out_file, buf);
14067 break;
14069 case CONST_INT:
14070 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14071 break;
14073 case CONST:
14074 /* This used to output parentheses around the expression,
14075 but that does not work on the 386 (either ATT or BSD assembler). */
14076 output_pic_addr_const (file, XEXP (x, 0), code);
14077 break;
14079 case CONST_DOUBLE:
14080 if (GET_MODE (x) == VOIDmode)
14082 /* We can use %d if the number is <32 bits and positive. */
14083 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14084 fprintf (file, "0x%lx%08lx",
14085 (unsigned long) CONST_DOUBLE_HIGH (x),
14086 (unsigned long) CONST_DOUBLE_LOW (x));
14087 else
14088 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14090 else
14091 /* We can't handle floating point constants;
14092 TARGET_PRINT_OPERAND must handle them. */
14093 output_operand_lossage ("floating constant misused");
14094 break;
14096 case PLUS:
14097 /* Some assemblers need integer constants to appear first. */
14098 if (CONST_INT_P (XEXP (x, 0)))
14100 output_pic_addr_const (file, XEXP (x, 0), code);
14101 putc ('+', file);
14102 output_pic_addr_const (file, XEXP (x, 1), code);
14104 else
14106 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14107 output_pic_addr_const (file, XEXP (x, 1), code);
14108 putc ('+', file);
14109 output_pic_addr_const (file, XEXP (x, 0), code);
14111 break;
14113 case MINUS:
14114 if (!TARGET_MACHO)
14115 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14116 output_pic_addr_const (file, XEXP (x, 0), code);
14117 putc ('-', file);
14118 output_pic_addr_const (file, XEXP (x, 1), code);
14119 if (!TARGET_MACHO)
14120 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14121 break;
14123 case UNSPEC:
14124 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14126 bool f = i386_asm_output_addr_const_extra (file, x);
14127 gcc_assert (f);
14128 break;
14131 gcc_assert (XVECLEN (x, 0) == 1);
14132 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14133 switch (XINT (x, 1))
14135 case UNSPEC_GOT:
14136 fputs ("@GOT", file);
14137 break;
14138 case UNSPEC_GOTOFF:
14139 fputs ("@GOTOFF", file);
14140 break;
14141 case UNSPEC_PLTOFF:
14142 fputs ("@PLTOFF", file);
14143 break;
14144 case UNSPEC_PCREL:
14145 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14146 "(%rip)" : "[rip]", file);
14147 break;
14148 case UNSPEC_GOTPCREL:
14149 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14150 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14151 break;
14152 case UNSPEC_GOTTPOFF:
14153 /* FIXME: This might be @TPOFF in Sun ld too. */
14154 fputs ("@gottpoff", file);
14155 break;
14156 case UNSPEC_TPOFF:
14157 fputs ("@tpoff", file);
14158 break;
14159 case UNSPEC_NTPOFF:
14160 if (TARGET_64BIT)
14161 fputs ("@tpoff", file);
14162 else
14163 fputs ("@ntpoff", file);
14164 break;
14165 case UNSPEC_DTPOFF:
14166 fputs ("@dtpoff", file);
14167 break;
14168 case UNSPEC_GOTNTPOFF:
14169 if (TARGET_64BIT)
14170 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14171 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14172 else
14173 fputs ("@gotntpoff", file);
14174 break;
14175 case UNSPEC_INDNTPOFF:
14176 fputs ("@indntpoff", file);
14177 break;
14178 #if TARGET_MACHO
14179 case UNSPEC_MACHOPIC_OFFSET:
14180 putc ('-', file);
14181 machopic_output_function_base_name (file);
14182 break;
14183 #endif
14184 default:
14185 output_operand_lossage ("invalid UNSPEC as operand");
14186 break;
14188 break;
14190 default:
14191 output_operand_lossage ("invalid expression as operand");
14195 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14196 We need to emit DTP-relative relocations. */
14198 static void ATTRIBUTE_UNUSED
14199 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14201 fputs (ASM_LONG, file);
14202 output_addr_const (file, x);
14203 fputs ("@dtpoff", file);
14204 switch (size)
14206 case 4:
14207 break;
14208 case 8:
14209 fputs (", 0", file);
14210 break;
14211 default:
14212 gcc_unreachable ();
14216 /* Return true if X is a representation of the PIC register. This copes
14217 with calls from ix86_find_base_term, where the register might have
14218 been replaced by a cselib value. */
14220 static bool
14221 ix86_pic_register_p (rtx x)
14223 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14224 return (pic_offset_table_rtx
14225 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14226 else
14227 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14230 /* Helper function for ix86_delegitimize_address.
14231 Attempt to delegitimize TLS local-exec accesses. */
14233 static rtx
14234 ix86_delegitimize_tls_address (rtx orig_x)
14236 rtx x = orig_x, unspec;
14237 struct ix86_address addr;
14239 if (!TARGET_TLS_DIRECT_SEG_REFS)
14240 return orig_x;
14241 if (MEM_P (x))
14242 x = XEXP (x, 0);
14243 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14244 return orig_x;
14245 if (ix86_decompose_address (x, &addr) == 0
14246 || addr.seg != DEFAULT_TLS_SEG_REG
14247 || addr.disp == NULL_RTX
14248 || GET_CODE (addr.disp) != CONST)
14249 return orig_x;
14250 unspec = XEXP (addr.disp, 0);
14251 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14252 unspec = XEXP (unspec, 0);
14253 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14254 return orig_x;
14255 x = XVECEXP (unspec, 0, 0);
14256 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14257 if (unspec != XEXP (addr.disp, 0))
14258 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14259 if (addr.index)
14261 rtx idx = addr.index;
14262 if (addr.scale != 1)
14263 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14264 x = gen_rtx_PLUS (Pmode, idx, x);
14266 if (addr.base)
14267 x = gen_rtx_PLUS (Pmode, addr.base, x);
14268 if (MEM_P (orig_x))
14269 x = replace_equiv_address_nv (orig_x, x);
14270 return x;
14273 /* In the name of slightly smaller debug output, and to cater to
14274 general assembler lossage, recognize PIC+GOTOFF and turn it back
14275 into a direct symbol reference.
14277 On Darwin, this is necessary to avoid a crash, because Darwin
14278 has a different PIC label for each routine but the DWARF debugging
14279 information is not associated with any particular routine, so it's
14280 necessary to remove references to the PIC label from RTL stored by
14281 the DWARF output code. */
14283 static rtx
14284 ix86_delegitimize_address (rtx x)
14286 rtx orig_x = delegitimize_mem_from_attrs (x);
14287 /* addend is NULL or some rtx if x is something+GOTOFF where
14288 something doesn't include the PIC register. */
14289 rtx addend = NULL_RTX;
14290 /* reg_addend is NULL or a multiple of some register. */
14291 rtx reg_addend = NULL_RTX;
14292 /* const_addend is NULL or a const_int. */
14293 rtx const_addend = NULL_RTX;
14294 /* This is the result, or NULL. */
14295 rtx result = NULL_RTX;
14297 x = orig_x;
14299 if (MEM_P (x))
14300 x = XEXP (x, 0);
14302 if (TARGET_64BIT)
14304 if (GET_CODE (x) == CONST
14305 && GET_CODE (XEXP (x, 0)) == PLUS
14306 && GET_MODE (XEXP (x, 0)) == Pmode
14307 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14308 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14309 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14311 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14312 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14313 if (MEM_P (orig_x))
14314 x = replace_equiv_address_nv (orig_x, x);
14315 return x;
14318 if (GET_CODE (x) == CONST
14319 && GET_CODE (XEXP (x, 0)) == UNSPEC
14320 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14321 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14322 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14324 x = XVECEXP (XEXP (x, 0), 0, 0);
14325 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14327 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14328 GET_MODE (x), 0);
14329 if (x == NULL_RTX)
14330 return orig_x;
14332 return x;
14335 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14336 return ix86_delegitimize_tls_address (orig_x);
14338 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14339 and -mcmodel=medium -fpic. */
14342 if (GET_CODE (x) != PLUS
14343 || GET_CODE (XEXP (x, 1)) != CONST)
14344 return ix86_delegitimize_tls_address (orig_x);
14346 if (ix86_pic_register_p (XEXP (x, 0)))
14347 /* %ebx + GOT/GOTOFF */
14349 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14351 /* %ebx + %reg * scale + GOT/GOTOFF */
14352 reg_addend = XEXP (x, 0);
14353 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14354 reg_addend = XEXP (reg_addend, 1);
14355 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14356 reg_addend = XEXP (reg_addend, 0);
14357 else
14359 reg_addend = NULL_RTX;
14360 addend = XEXP (x, 0);
14363 else
14364 addend = XEXP (x, 0);
14366 x = XEXP (XEXP (x, 1), 0);
14367 if (GET_CODE (x) == PLUS
14368 && CONST_INT_P (XEXP (x, 1)))
14370 const_addend = XEXP (x, 1);
14371 x = XEXP (x, 0);
14374 if (GET_CODE (x) == UNSPEC
14375 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14376 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14377 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14378 && !MEM_P (orig_x) && !addend)))
14379 result = XVECEXP (x, 0, 0);
14381 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14382 && !MEM_P (orig_x))
14383 result = XVECEXP (x, 0, 0);
14385 if (! result)
14386 return ix86_delegitimize_tls_address (orig_x);
14388 if (const_addend)
14389 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14390 if (reg_addend)
14391 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14392 if (addend)
14394 /* If the rest of original X doesn't involve the PIC register, add
14395 addend and subtract pic_offset_table_rtx. This can happen e.g.
14396 for code like:
14397 leal (%ebx, %ecx, 4), %ecx
14399 movl foo@GOTOFF(%ecx), %edx
14400 in which case we return (%ecx - %ebx) + foo. */
14401 if (pic_offset_table_rtx)
14402 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14403 pic_offset_table_rtx),
14404 result);
14405 else
14406 return orig_x;
14408 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14410 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14411 if (result == NULL_RTX)
14412 return orig_x;
14414 return result;
14417 /* If X is a machine specific address (i.e. a symbol or label being
14418 referenced as a displacement from the GOT implemented using an
14419 UNSPEC), then return the base term. Otherwise return X. */
14422 ix86_find_base_term (rtx x)
14424 rtx term;
14426 if (TARGET_64BIT)
14428 if (GET_CODE (x) != CONST)
14429 return x;
14430 term = XEXP (x, 0);
14431 if (GET_CODE (term) == PLUS
14432 && (CONST_INT_P (XEXP (term, 1))
14433 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14434 term = XEXP (term, 0);
14435 if (GET_CODE (term) != UNSPEC
14436 || (XINT (term, 1) != UNSPEC_GOTPCREL
14437 && XINT (term, 1) != UNSPEC_PCREL))
14438 return x;
14440 return XVECEXP (term, 0, 0);
14443 return ix86_delegitimize_address (x);
14446 static void
14447 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14448 bool fp, FILE *file)
14450 const char *suffix;
14452 if (mode == CCFPmode || mode == CCFPUmode)
14454 code = ix86_fp_compare_code_to_integer (code);
14455 mode = CCmode;
14457 if (reverse)
14458 code = reverse_condition (code);
14460 switch (code)
14462 case EQ:
14463 switch (mode)
14465 case CCAmode:
14466 suffix = "a";
14467 break;
14469 case CCCmode:
14470 suffix = "c";
14471 break;
14473 case CCOmode:
14474 suffix = "o";
14475 break;
14477 case CCSmode:
14478 suffix = "s";
14479 break;
14481 default:
14482 suffix = "e";
14484 break;
14485 case NE:
14486 switch (mode)
14488 case CCAmode:
14489 suffix = "na";
14490 break;
14492 case CCCmode:
14493 suffix = "nc";
14494 break;
14496 case CCOmode:
14497 suffix = "no";
14498 break;
14500 case CCSmode:
14501 suffix = "ns";
14502 break;
14504 default:
14505 suffix = "ne";
14507 break;
14508 case GT:
14509 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14510 suffix = "g";
14511 break;
14512 case GTU:
14513 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14514 Those same assemblers have the same but opposite lossage on cmov. */
14515 if (mode == CCmode)
14516 suffix = fp ? "nbe" : "a";
14517 else
14518 gcc_unreachable ();
14519 break;
14520 case LT:
14521 switch (mode)
14523 case CCNOmode:
14524 case CCGOCmode:
14525 suffix = "s";
14526 break;
14528 case CCmode:
14529 case CCGCmode:
14530 suffix = "l";
14531 break;
14533 default:
14534 gcc_unreachable ();
14536 break;
14537 case LTU:
14538 if (mode == CCmode)
14539 suffix = "b";
14540 else if (mode == CCCmode)
14541 suffix = fp ? "b" : "c";
14542 else
14543 gcc_unreachable ();
14544 break;
14545 case GE:
14546 switch (mode)
14548 case CCNOmode:
14549 case CCGOCmode:
14550 suffix = "ns";
14551 break;
14553 case CCmode:
14554 case CCGCmode:
14555 suffix = "ge";
14556 break;
14558 default:
14559 gcc_unreachable ();
14561 break;
14562 case GEU:
14563 if (mode == CCmode)
14564 suffix = "nb";
14565 else if (mode == CCCmode)
14566 suffix = fp ? "nb" : "nc";
14567 else
14568 gcc_unreachable ();
14569 break;
14570 case LE:
14571 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14572 suffix = "le";
14573 break;
14574 case LEU:
14575 if (mode == CCmode)
14576 suffix = "be";
14577 else
14578 gcc_unreachable ();
14579 break;
14580 case UNORDERED:
14581 suffix = fp ? "u" : "p";
14582 break;
14583 case ORDERED:
14584 suffix = fp ? "nu" : "np";
14585 break;
14586 default:
14587 gcc_unreachable ();
14589 fputs (suffix, file);
14592 /* Print the name of register X to FILE based on its machine mode and number.
14593 If CODE is 'w', pretend the mode is HImode.
14594 If CODE is 'b', pretend the mode is QImode.
14595 If CODE is 'k', pretend the mode is SImode.
14596 If CODE is 'q', pretend the mode is DImode.
14597 If CODE is 'x', pretend the mode is V4SFmode.
14598 If CODE is 't', pretend the mode is V8SFmode.
14599 If CODE is 'g', pretend the mode is V16SFmode.
14600 If CODE is 'h', pretend the reg is the 'high' byte register.
14601 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14602 If CODE is 'd', duplicate the operand for AVX instruction.
14605 void
14606 print_reg (rtx x, int code, FILE *file)
14608 const char *reg;
14609 unsigned int regno;
14610 bool duplicated = code == 'd' && TARGET_AVX;
14612 if (ASSEMBLER_DIALECT == ASM_ATT)
14613 putc ('%', file);
14615 if (x == pc_rtx)
14617 gcc_assert (TARGET_64BIT);
14618 fputs ("rip", file);
14619 return;
14622 regno = true_regnum (x);
14623 gcc_assert (regno != ARG_POINTER_REGNUM
14624 && regno != FRAME_POINTER_REGNUM
14625 && regno != FLAGS_REG
14626 && regno != FPSR_REG
14627 && regno != FPCR_REG);
14629 if (code == 'w' || MMX_REG_P (x))
14630 code = 2;
14631 else if (code == 'b')
14632 code = 1;
14633 else if (code == 'k')
14634 code = 4;
14635 else if (code == 'q')
14636 code = 8;
14637 else if (code == 'y')
14638 code = 3;
14639 else if (code == 'h')
14640 code = 0;
14641 else if (code == 'x')
14642 code = 16;
14643 else if (code == 't')
14644 code = 32;
14645 else if (code == 'g')
14646 code = 64;
14647 else
14648 code = GET_MODE_SIZE (GET_MODE (x));
14650 /* Irritatingly, AMD extended registers use different naming convention
14651 from the normal registers: "r%d[bwd]" */
14652 if (REX_INT_REGNO_P (regno))
14654 gcc_assert (TARGET_64BIT);
14655 putc ('r', file);
14656 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14657 switch (code)
14659 case 0:
14660 error ("extended registers have no high halves");
14661 break;
14662 case 1:
14663 putc ('b', file);
14664 break;
14665 case 2:
14666 putc ('w', file);
14667 break;
14668 case 4:
14669 putc ('d', file);
14670 break;
14671 case 8:
14672 /* no suffix */
14673 break;
14674 default:
14675 error ("unsupported operand size for extended register");
14676 break;
14678 return;
14681 reg = NULL;
14682 switch (code)
14684 case 3:
14685 if (STACK_TOP_P (x))
14687 reg = "st(0)";
14688 break;
14690 /* FALLTHRU */
14691 case 8:
14692 case 4:
14693 case 12:
14694 if (! ANY_FP_REG_P (x))
14695 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14696 /* FALLTHRU */
14697 case 16:
14698 case 2:
14699 normal:
14700 reg = hi_reg_name[regno];
14701 break;
14702 case 1:
14703 if (regno >= ARRAY_SIZE (qi_reg_name))
14704 goto normal;
14705 reg = qi_reg_name[regno];
14706 break;
14707 case 0:
14708 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14709 goto normal;
14710 reg = qi_high_reg_name[regno];
14711 break;
14712 case 32:
14713 if (SSE_REG_P (x))
14715 gcc_assert (!duplicated);
14716 putc ('y', file);
14717 fputs (hi_reg_name[regno] + 1, file);
14718 return;
14720 case 64:
14721 if (SSE_REG_P (x))
14723 gcc_assert (!duplicated);
14724 putc ('z', file);
14725 fputs (hi_reg_name[REGNO (x)] + 1, file);
14726 return;
14728 break;
14729 default:
14730 gcc_unreachable ();
14733 fputs (reg, file);
14734 if (duplicated)
14736 if (ASSEMBLER_DIALECT == ASM_ATT)
14737 fprintf (file, ", %%%s", reg);
14738 else
14739 fprintf (file, ", %s", reg);
14743 /* Locate some local-dynamic symbol still in use by this function
14744 so that we can print its name in some tls_local_dynamic_base
14745 pattern. */
14747 static int
14748 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14750 rtx x = *px;
14752 if (GET_CODE (x) == SYMBOL_REF
14753 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14755 cfun->machine->some_ld_name = XSTR (x, 0);
14756 return 1;
14759 return 0;
14762 static const char *
14763 get_some_local_dynamic_name (void)
14765 rtx insn;
14767 if (cfun->machine->some_ld_name)
14768 return cfun->machine->some_ld_name;
14770 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14771 if (NONDEBUG_INSN_P (insn)
14772 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14773 return cfun->machine->some_ld_name;
14775 return NULL;
14778 /* Meaning of CODE:
14779 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14780 C -- print opcode suffix for set/cmov insn.
14781 c -- like C, but print reversed condition
14782 F,f -- likewise, but for floating-point.
14783 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14784 otherwise nothing
14785 R -- print embeded rounding and sae.
14786 r -- print only sae.
14787 z -- print the opcode suffix for the size of the current operand.
14788 Z -- likewise, with special suffixes for x87 instructions.
14789 * -- print a star (in certain assembler syntax)
14790 A -- print an absolute memory reference.
14791 E -- print address with DImode register names if TARGET_64BIT.
14792 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14793 s -- print a shift double count, followed by the assemblers argument
14794 delimiter.
14795 b -- print the QImode name of the register for the indicated operand.
14796 %b0 would print %al if operands[0] is reg 0.
14797 w -- likewise, print the HImode name of the register.
14798 k -- likewise, print the SImode name of the register.
14799 q -- likewise, print the DImode name of the register.
14800 x -- likewise, print the V4SFmode name of the register.
14801 t -- likewise, print the V8SFmode name of the register.
14802 g -- likewise, print the V16SFmode name of the register.
14803 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14804 y -- print "st(0)" instead of "st" as a register.
14805 d -- print duplicated register operand for AVX instruction.
14806 D -- print condition for SSE cmp instruction.
14807 P -- if PIC, print an @PLT suffix.
14808 p -- print raw symbol name.
14809 X -- don't print any sort of PIC '@' suffix for a symbol.
14810 & -- print some in-use local-dynamic symbol name.
14811 H -- print a memory address offset by 8; used for sse high-parts
14812 Y -- print condition for XOP pcom* instruction.
14813 + -- print a branch hint as 'cs' or 'ds' prefix
14814 ; -- print a semicolon (after prefixes due to bug in older gas).
14815 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14816 @ -- print a segment register of thread base pointer load
14817 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14820 void
14821 ix86_print_operand (FILE *file, rtx x, int code)
14823 if (code)
14825 switch (code)
14827 case 'A':
14828 switch (ASSEMBLER_DIALECT)
14830 case ASM_ATT:
14831 putc ('*', file);
14832 break;
14834 case ASM_INTEL:
14835 /* Intel syntax. For absolute addresses, registers should not
14836 be surrounded by braces. */
14837 if (!REG_P (x))
14839 putc ('[', file);
14840 ix86_print_operand (file, x, 0);
14841 putc (']', file);
14842 return;
14844 break;
14846 default:
14847 gcc_unreachable ();
14850 ix86_print_operand (file, x, 0);
14851 return;
14853 case 'E':
14854 /* Wrap address in an UNSPEC to declare special handling. */
14855 if (TARGET_64BIT)
14856 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14858 output_address (x);
14859 return;
14861 case 'L':
14862 if (ASSEMBLER_DIALECT == ASM_ATT)
14863 putc ('l', file);
14864 return;
14866 case 'W':
14867 if (ASSEMBLER_DIALECT == ASM_ATT)
14868 putc ('w', file);
14869 return;
14871 case 'B':
14872 if (ASSEMBLER_DIALECT == ASM_ATT)
14873 putc ('b', file);
14874 return;
14876 case 'Q':
14877 if (ASSEMBLER_DIALECT == ASM_ATT)
14878 putc ('l', file);
14879 return;
14881 case 'S':
14882 if (ASSEMBLER_DIALECT == ASM_ATT)
14883 putc ('s', file);
14884 return;
14886 case 'T':
14887 if (ASSEMBLER_DIALECT == ASM_ATT)
14888 putc ('t', file);
14889 return;
14891 case 'O':
14892 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14893 if (ASSEMBLER_DIALECT != ASM_ATT)
14894 return;
14896 switch (GET_MODE_SIZE (GET_MODE (x)))
14898 case 2:
14899 putc ('w', file);
14900 break;
14902 case 4:
14903 putc ('l', file);
14904 break;
14906 case 8:
14907 putc ('q', file);
14908 break;
14910 default:
14911 output_operand_lossage
14912 ("invalid operand size for operand code 'O'");
14913 return;
14916 putc ('.', file);
14917 #endif
14918 return;
14920 case 'z':
14921 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14923 /* Opcodes don't get size suffixes if using Intel opcodes. */
14924 if (ASSEMBLER_DIALECT == ASM_INTEL)
14925 return;
14927 switch (GET_MODE_SIZE (GET_MODE (x)))
14929 case 1:
14930 putc ('b', file);
14931 return;
14933 case 2:
14934 putc ('w', file);
14935 return;
14937 case 4:
14938 putc ('l', file);
14939 return;
14941 case 8:
14942 putc ('q', file);
14943 return;
14945 default:
14946 output_operand_lossage
14947 ("invalid operand size for operand code 'z'");
14948 return;
14952 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14953 warning
14954 (0, "non-integer operand used with operand code 'z'");
14955 /* FALLTHRU */
14957 case 'Z':
14958 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14959 if (ASSEMBLER_DIALECT == ASM_INTEL)
14960 return;
14962 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14964 switch (GET_MODE_SIZE (GET_MODE (x)))
14966 case 2:
14967 #ifdef HAVE_AS_IX86_FILDS
14968 putc ('s', file);
14969 #endif
14970 return;
14972 case 4:
14973 putc ('l', file);
14974 return;
14976 case 8:
14977 #ifdef HAVE_AS_IX86_FILDQ
14978 putc ('q', file);
14979 #else
14980 fputs ("ll", file);
14981 #endif
14982 return;
14984 default:
14985 break;
14988 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14990 /* 387 opcodes don't get size suffixes
14991 if the operands are registers. */
14992 if (STACK_REG_P (x))
14993 return;
14995 switch (GET_MODE_SIZE (GET_MODE (x)))
14997 case 4:
14998 putc ('s', file);
14999 return;
15001 case 8:
15002 putc ('l', file);
15003 return;
15005 case 12:
15006 case 16:
15007 putc ('t', file);
15008 return;
15010 default:
15011 break;
15014 else
15016 output_operand_lossage
15017 ("invalid operand type used with operand code 'Z'");
15018 return;
15021 output_operand_lossage
15022 ("invalid operand size for operand code 'Z'");
15023 return;
15025 case 'd':
15026 case 'b':
15027 case 'w':
15028 case 'k':
15029 case 'q':
15030 case 'h':
15031 case 't':
15032 case 'g':
15033 case 'y':
15034 case 'x':
15035 case 'X':
15036 case 'P':
15037 case 'p':
15038 break;
15040 case 's':
15041 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15043 ix86_print_operand (file, x, 0);
15044 fputs (", ", file);
15046 return;
15048 case 'Y':
15049 switch (GET_CODE (x))
15051 case NE:
15052 fputs ("neq", file);
15053 break;
15054 case EQ:
15055 fputs ("eq", file);
15056 break;
15057 case GE:
15058 case GEU:
15059 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15060 break;
15061 case GT:
15062 case GTU:
15063 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15064 break;
15065 case LE:
15066 case LEU:
15067 fputs ("le", file);
15068 break;
15069 case LT:
15070 case LTU:
15071 fputs ("lt", file);
15072 break;
15073 case UNORDERED:
15074 fputs ("unord", file);
15075 break;
15076 case ORDERED:
15077 fputs ("ord", file);
15078 break;
15079 case UNEQ:
15080 fputs ("ueq", file);
15081 break;
15082 case UNGE:
15083 fputs ("nlt", file);
15084 break;
15085 case UNGT:
15086 fputs ("nle", file);
15087 break;
15088 case UNLE:
15089 fputs ("ule", file);
15090 break;
15091 case UNLT:
15092 fputs ("ult", file);
15093 break;
15094 case LTGT:
15095 fputs ("une", file);
15096 break;
15097 default:
15098 output_operand_lossage ("operand is not a condition code, "
15099 "invalid operand code 'Y'");
15100 return;
15102 return;
15104 case 'D':
15105 /* Little bit of braindamage here. The SSE compare instructions
15106 does use completely different names for the comparisons that the
15107 fp conditional moves. */
15108 switch (GET_CODE (x))
15110 case UNEQ:
15111 if (TARGET_AVX)
15113 fputs ("eq_us", file);
15114 break;
15116 case EQ:
15117 fputs ("eq", file);
15118 break;
15119 case UNLT:
15120 if (TARGET_AVX)
15122 fputs ("nge", file);
15123 break;
15125 case LT:
15126 fputs ("lt", file);
15127 break;
15128 case UNLE:
15129 if (TARGET_AVX)
15131 fputs ("ngt", file);
15132 break;
15134 case LE:
15135 fputs ("le", file);
15136 break;
15137 case UNORDERED:
15138 fputs ("unord", file);
15139 break;
15140 case LTGT:
15141 if (TARGET_AVX)
15143 fputs ("neq_oq", file);
15144 break;
15146 case NE:
15147 fputs ("neq", file);
15148 break;
15149 case GE:
15150 if (TARGET_AVX)
15152 fputs ("ge", file);
15153 break;
15155 case UNGE:
15156 fputs ("nlt", file);
15157 break;
15158 case GT:
15159 if (TARGET_AVX)
15161 fputs ("gt", file);
15162 break;
15164 case UNGT:
15165 fputs ("nle", file);
15166 break;
15167 case ORDERED:
15168 fputs ("ord", file);
15169 break;
15170 default:
15171 output_operand_lossage ("operand is not a condition code, "
15172 "invalid operand code 'D'");
15173 return;
15175 return;
15177 case 'F':
15178 case 'f':
15179 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15180 if (ASSEMBLER_DIALECT == ASM_ATT)
15181 putc ('.', file);
15182 #endif
15184 case 'C':
15185 case 'c':
15186 if (!COMPARISON_P (x))
15188 output_operand_lossage ("operand is not a condition code, "
15189 "invalid operand code '%c'", code);
15190 return;
15192 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15193 code == 'c' || code == 'f',
15194 code == 'F' || code == 'f',
15195 file);
15196 return;
15198 case 'H':
15199 if (!offsettable_memref_p (x))
15201 output_operand_lossage ("operand is not an offsettable memory "
15202 "reference, invalid operand code 'H'");
15203 return;
15205 /* It doesn't actually matter what mode we use here, as we're
15206 only going to use this for printing. */
15207 x = adjust_address_nv (x, DImode, 8);
15208 /* Output 'qword ptr' for intel assembler dialect. */
15209 if (ASSEMBLER_DIALECT == ASM_INTEL)
15210 code = 'q';
15211 break;
15213 case 'K':
15214 gcc_assert (CONST_INT_P (x));
15216 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15217 #ifdef HAVE_AS_IX86_HLE
15218 fputs ("xacquire ", file);
15219 #else
15220 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15221 #endif
15222 else if (INTVAL (x) & IX86_HLE_RELEASE)
15223 #ifdef HAVE_AS_IX86_HLE
15224 fputs ("xrelease ", file);
15225 #else
15226 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15227 #endif
15228 /* We do not want to print value of the operand. */
15229 return;
15231 case 'N':
15232 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15233 fputs ("{z}", file);
15234 return;
15236 case 'r':
15237 gcc_assert (CONST_INT_P (x));
15238 gcc_assert (INTVAL (x) == ROUND_SAE);
15240 if (ASSEMBLER_DIALECT == ASM_INTEL)
15241 fputs (", ", file);
15243 fputs ("{sae}", file);
15245 if (ASSEMBLER_DIALECT == ASM_ATT)
15246 fputs (", ", file);
15248 return;
15250 case 'R':
15251 gcc_assert (CONST_INT_P (x));
15253 if (ASSEMBLER_DIALECT == ASM_INTEL)
15254 fputs (", ", file);
15256 switch (INTVAL (x))
15258 case ROUND_NEAREST_INT | ROUND_SAE:
15259 fputs ("{rn-sae}", file);
15260 break;
15261 case ROUND_NEG_INF | ROUND_SAE:
15262 fputs ("{rd-sae}", file);
15263 break;
15264 case ROUND_POS_INF | ROUND_SAE:
15265 fputs ("{ru-sae}", file);
15266 break;
15267 case ROUND_ZERO | ROUND_SAE:
15268 fputs ("{rz-sae}", file);
15269 break;
15270 default:
15271 gcc_unreachable ();
15274 if (ASSEMBLER_DIALECT == ASM_ATT)
15275 fputs (", ", file);
15277 return;
15279 case '*':
15280 if (ASSEMBLER_DIALECT == ASM_ATT)
15281 putc ('*', file);
15282 return;
15284 case '&':
15286 const char *name = get_some_local_dynamic_name ();
15287 if (name == NULL)
15288 output_operand_lossage ("'%%&' used without any "
15289 "local dynamic TLS references");
15290 else
15291 assemble_name (file, name);
15292 return;
15295 case '+':
15297 rtx x;
15299 if (!optimize
15300 || optimize_function_for_size_p (cfun)
15301 || !TARGET_BRANCH_PREDICTION_HINTS)
15302 return;
15304 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15305 if (x)
15307 int pred_val = XINT (x, 0);
15309 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15310 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15312 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15313 bool cputaken
15314 = final_forward_branch_p (current_output_insn) == 0;
15316 /* Emit hints only in the case default branch prediction
15317 heuristics would fail. */
15318 if (taken != cputaken)
15320 /* We use 3e (DS) prefix for taken branches and
15321 2e (CS) prefix for not taken branches. */
15322 if (taken)
15323 fputs ("ds ; ", file);
15324 else
15325 fputs ("cs ; ", file);
15329 return;
15332 case ';':
15333 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15334 putc (';', file);
15335 #endif
15336 return;
15338 case '@':
15339 if (ASSEMBLER_DIALECT == ASM_ATT)
15340 putc ('%', file);
15342 /* The kernel uses a different segment register for performance
15343 reasons; a system call would not have to trash the userspace
15344 segment register, which would be expensive. */
15345 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15346 fputs ("fs", file);
15347 else
15348 fputs ("gs", file);
15349 return;
15351 case '~':
15352 putc (TARGET_AVX2 ? 'i' : 'f', file);
15353 return;
15355 case '^':
15356 if (TARGET_64BIT && Pmode != word_mode)
15357 fputs ("addr32 ", file);
15358 return;
15360 default:
15361 output_operand_lossage ("invalid operand code '%c'", code);
15365 if (REG_P (x))
15366 print_reg (x, code, file);
15368 else if (MEM_P (x))
15370 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15371 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15372 && GET_MODE (x) != BLKmode)
15374 const char * size;
15375 switch (GET_MODE_SIZE (GET_MODE (x)))
15377 case 1: size = "BYTE"; break;
15378 case 2: size = "WORD"; break;
15379 case 4: size = "DWORD"; break;
15380 case 8: size = "QWORD"; break;
15381 case 12: size = "TBYTE"; break;
15382 case 16:
15383 if (GET_MODE (x) == XFmode)
15384 size = "TBYTE";
15385 else
15386 size = "XMMWORD";
15387 break;
15388 case 32: size = "YMMWORD"; break;
15389 case 64: size = "ZMMWORD"; break;
15390 default:
15391 gcc_unreachable ();
15394 /* Check for explicit size override (codes 'b', 'w', 'k',
15395 'q' and 'x') */
15396 if (code == 'b')
15397 size = "BYTE";
15398 else if (code == 'w')
15399 size = "WORD";
15400 else if (code == 'k')
15401 size = "DWORD";
15402 else if (code == 'q')
15403 size = "QWORD";
15404 else if (code == 'x')
15405 size = "XMMWORD";
15407 fputs (size, file);
15408 fputs (" PTR ", file);
15411 x = XEXP (x, 0);
15412 /* Avoid (%rip) for call operands. */
15413 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15414 && !CONST_INT_P (x))
15415 output_addr_const (file, x);
15416 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15417 output_operand_lossage ("invalid constraints for operand");
15418 else
15419 output_address (x);
15422 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15424 REAL_VALUE_TYPE r;
15425 long l;
15427 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15428 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15430 if (ASSEMBLER_DIALECT == ASM_ATT)
15431 putc ('$', file);
15432 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15433 if (code == 'q')
15434 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15435 (unsigned long long) (int) l);
15436 else
15437 fprintf (file, "0x%08x", (unsigned int) l);
15440 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15442 REAL_VALUE_TYPE r;
15443 long l[2];
15445 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15446 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15448 if (ASSEMBLER_DIALECT == ASM_ATT)
15449 putc ('$', file);
15450 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15453 /* These float cases don't actually occur as immediate operands. */
15454 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15456 char dstr[30];
15458 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15459 fputs (dstr, file);
15462 else
15464 /* We have patterns that allow zero sets of memory, for instance.
15465 In 64-bit mode, we should probably support all 8-byte vectors,
15466 since we can in fact encode that into an immediate. */
15467 if (GET_CODE (x) == CONST_VECTOR)
15469 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15470 x = const0_rtx;
15473 if (code != 'P' && code != 'p')
15475 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15477 if (ASSEMBLER_DIALECT == ASM_ATT)
15478 putc ('$', file);
15480 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15481 || GET_CODE (x) == LABEL_REF)
15483 if (ASSEMBLER_DIALECT == ASM_ATT)
15484 putc ('$', file);
15485 else
15486 fputs ("OFFSET FLAT:", file);
15489 if (CONST_INT_P (x))
15490 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15491 else if (flag_pic || MACHOPIC_INDIRECT)
15492 output_pic_addr_const (file, x, code);
15493 else
15494 output_addr_const (file, x);
15498 static bool
15499 ix86_print_operand_punct_valid_p (unsigned char code)
15501 return (code == '@' || code == '*' || code == '+' || code == '&'
15502 || code == ';' || code == '~' || code == '^');
15505 /* Print a memory operand whose address is ADDR. */
15507 static void
15508 ix86_print_operand_address (FILE *file, rtx addr)
15510 struct ix86_address parts;
15511 rtx base, index, disp;
15512 int scale;
15513 int ok;
15514 bool vsib = false;
15515 int code = 0;
15517 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15519 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15520 gcc_assert (parts.index == NULL_RTX);
15521 parts.index = XVECEXP (addr, 0, 1);
15522 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15523 addr = XVECEXP (addr, 0, 0);
15524 vsib = true;
15526 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15528 gcc_assert (TARGET_64BIT);
15529 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15530 code = 'q';
15532 else
15533 ok = ix86_decompose_address (addr, &parts);
15535 gcc_assert (ok);
15537 base = parts.base;
15538 index = parts.index;
15539 disp = parts.disp;
15540 scale = parts.scale;
15542 switch (parts.seg)
15544 case SEG_DEFAULT:
15545 break;
15546 case SEG_FS:
15547 case SEG_GS:
15548 if (ASSEMBLER_DIALECT == ASM_ATT)
15549 putc ('%', file);
15550 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15551 break;
15552 default:
15553 gcc_unreachable ();
15556 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15557 if (TARGET_64BIT && !base && !index)
15559 rtx symbol = disp;
15561 if (GET_CODE (disp) == CONST
15562 && GET_CODE (XEXP (disp, 0)) == PLUS
15563 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15564 symbol = XEXP (XEXP (disp, 0), 0);
15566 if (GET_CODE (symbol) == LABEL_REF
15567 || (GET_CODE (symbol) == SYMBOL_REF
15568 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15569 base = pc_rtx;
15571 if (!base && !index)
15573 /* Displacement only requires special attention. */
15575 if (CONST_INT_P (disp))
15577 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15578 fputs ("ds:", file);
15579 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15581 else if (flag_pic)
15582 output_pic_addr_const (file, disp, 0);
15583 else
15584 output_addr_const (file, disp);
15586 else
15588 /* Print SImode register names to force addr32 prefix. */
15589 if (SImode_address_operand (addr, VOIDmode))
15591 #ifdef ENABLE_CHECKING
15592 gcc_assert (TARGET_64BIT);
15593 switch (GET_CODE (addr))
15595 case SUBREG:
15596 gcc_assert (GET_MODE (addr) == SImode);
15597 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15598 break;
15599 case ZERO_EXTEND:
15600 case AND:
15601 gcc_assert (GET_MODE (addr) == DImode);
15602 break;
15603 default:
15604 gcc_unreachable ();
15606 #endif
15607 gcc_assert (!code);
15608 code = 'k';
15610 else if (code == 0
15611 && TARGET_X32
15612 && disp
15613 && CONST_INT_P (disp)
15614 && INTVAL (disp) < -16*1024*1024)
15616 /* X32 runs in 64-bit mode, where displacement, DISP, in
15617 address DISP(%r64), is encoded as 32-bit immediate sign-
15618 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15619 address is %r64 + 0xffffffffbffffd00. When %r64 <
15620 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15621 which is invalid for x32. The correct address is %r64
15622 - 0x40000300 == 0xf7ffdd64. To properly encode
15623 -0x40000300(%r64) for x32, we zero-extend negative
15624 displacement by forcing addr32 prefix which truncates
15625 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15626 zero-extend all negative displacements, including -1(%rsp).
15627 However, for small negative displacements, sign-extension
15628 won't cause overflow. We only zero-extend negative
15629 displacements if they < -16*1024*1024, which is also used
15630 to check legitimate address displacements for PIC. */
15631 code = 'k';
15634 if (ASSEMBLER_DIALECT == ASM_ATT)
15636 if (disp)
15638 if (flag_pic)
15639 output_pic_addr_const (file, disp, 0);
15640 else if (GET_CODE (disp) == LABEL_REF)
15641 output_asm_label (disp);
15642 else
15643 output_addr_const (file, disp);
15646 putc ('(', file);
15647 if (base)
15648 print_reg (base, code, file);
15649 if (index)
15651 putc (',', file);
15652 print_reg (index, vsib ? 0 : code, file);
15653 if (scale != 1 || vsib)
15654 fprintf (file, ",%d", scale);
15656 putc (')', file);
15658 else
15660 rtx offset = NULL_RTX;
15662 if (disp)
15664 /* Pull out the offset of a symbol; print any symbol itself. */
15665 if (GET_CODE (disp) == CONST
15666 && GET_CODE (XEXP (disp, 0)) == PLUS
15667 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15669 offset = XEXP (XEXP (disp, 0), 1);
15670 disp = gen_rtx_CONST (VOIDmode,
15671 XEXP (XEXP (disp, 0), 0));
15674 if (flag_pic)
15675 output_pic_addr_const (file, disp, 0);
15676 else if (GET_CODE (disp) == LABEL_REF)
15677 output_asm_label (disp);
15678 else if (CONST_INT_P (disp))
15679 offset = disp;
15680 else
15681 output_addr_const (file, disp);
15684 putc ('[', file);
15685 if (base)
15687 print_reg (base, code, file);
15688 if (offset)
15690 if (INTVAL (offset) >= 0)
15691 putc ('+', file);
15692 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15695 else if (offset)
15696 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15697 else
15698 putc ('0', file);
15700 if (index)
15702 putc ('+', file);
15703 print_reg (index, vsib ? 0 : code, file);
15704 if (scale != 1 || vsib)
15705 fprintf (file, "*%d", scale);
15707 putc (']', file);
15712 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15714 static bool
15715 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15717 rtx op;
15719 if (GET_CODE (x) != UNSPEC)
15720 return false;
15722 op = XVECEXP (x, 0, 0);
15723 switch (XINT (x, 1))
15725 case UNSPEC_GOTTPOFF:
15726 output_addr_const (file, op);
15727 /* FIXME: This might be @TPOFF in Sun ld. */
15728 fputs ("@gottpoff", file);
15729 break;
15730 case UNSPEC_TPOFF:
15731 output_addr_const (file, op);
15732 fputs ("@tpoff", file);
15733 break;
15734 case UNSPEC_NTPOFF:
15735 output_addr_const (file, op);
15736 if (TARGET_64BIT)
15737 fputs ("@tpoff", file);
15738 else
15739 fputs ("@ntpoff", file);
15740 break;
15741 case UNSPEC_DTPOFF:
15742 output_addr_const (file, op);
15743 fputs ("@dtpoff", file);
15744 break;
15745 case UNSPEC_GOTNTPOFF:
15746 output_addr_const (file, op);
15747 if (TARGET_64BIT)
15748 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15749 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15750 else
15751 fputs ("@gotntpoff", file);
15752 break;
15753 case UNSPEC_INDNTPOFF:
15754 output_addr_const (file, op);
15755 fputs ("@indntpoff", file);
15756 break;
15757 #if TARGET_MACHO
15758 case UNSPEC_MACHOPIC_OFFSET:
15759 output_addr_const (file, op);
15760 putc ('-', file);
15761 machopic_output_function_base_name (file);
15762 break;
15763 #endif
15765 case UNSPEC_STACK_CHECK:
15767 int offset;
15769 gcc_assert (flag_split_stack);
15771 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15772 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15773 #else
15774 gcc_unreachable ();
15775 #endif
15777 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15779 break;
15781 default:
15782 return false;
15785 return true;
15788 /* Split one or more double-mode RTL references into pairs of half-mode
15789 references. The RTL can be REG, offsettable MEM, integer constant, or
15790 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15791 split and "num" is its length. lo_half and hi_half are output arrays
15792 that parallel "operands". */
15794 void
15795 split_double_mode (enum machine_mode mode, rtx operands[],
15796 int num, rtx lo_half[], rtx hi_half[])
15798 enum machine_mode half_mode;
15799 unsigned int byte;
15801 switch (mode)
15803 case TImode:
15804 half_mode = DImode;
15805 break;
15806 case DImode:
15807 half_mode = SImode;
15808 break;
15809 default:
15810 gcc_unreachable ();
15813 byte = GET_MODE_SIZE (half_mode);
15815 while (num--)
15817 rtx op = operands[num];
15819 /* simplify_subreg refuse to split volatile memory addresses,
15820 but we still have to handle it. */
15821 if (MEM_P (op))
15823 lo_half[num] = adjust_address (op, half_mode, 0);
15824 hi_half[num] = adjust_address (op, half_mode, byte);
15826 else
15828 lo_half[num] = simplify_gen_subreg (half_mode, op,
15829 GET_MODE (op) == VOIDmode
15830 ? mode : GET_MODE (op), 0);
15831 hi_half[num] = simplify_gen_subreg (half_mode, op,
15832 GET_MODE (op) == VOIDmode
15833 ? mode : GET_MODE (op), byte);
15838 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15839 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15840 is the expression of the binary operation. The output may either be
15841 emitted here, or returned to the caller, like all output_* functions.
15843 There is no guarantee that the operands are the same mode, as they
15844 might be within FLOAT or FLOAT_EXTEND expressions. */
15846 #ifndef SYSV386_COMPAT
15847 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15848 wants to fix the assemblers because that causes incompatibility
15849 with gcc. No-one wants to fix gcc because that causes
15850 incompatibility with assemblers... You can use the option of
15851 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15852 #define SYSV386_COMPAT 1
15853 #endif
15855 const char *
15856 output_387_binary_op (rtx insn, rtx *operands)
15858 static char buf[40];
15859 const char *p;
15860 const char *ssep;
15861 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15863 #ifdef ENABLE_CHECKING
15864 /* Even if we do not want to check the inputs, this documents input
15865 constraints. Which helps in understanding the following code. */
15866 if (STACK_REG_P (operands[0])
15867 && ((REG_P (operands[1])
15868 && REGNO (operands[0]) == REGNO (operands[1])
15869 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15870 || (REG_P (operands[2])
15871 && REGNO (operands[0]) == REGNO (operands[2])
15872 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15873 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15874 ; /* ok */
15875 else
15876 gcc_assert (is_sse);
15877 #endif
15879 switch (GET_CODE (operands[3]))
15881 case PLUS:
15882 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15883 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15884 p = "fiadd";
15885 else
15886 p = "fadd";
15887 ssep = "vadd";
15888 break;
15890 case MINUS:
15891 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15892 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15893 p = "fisub";
15894 else
15895 p = "fsub";
15896 ssep = "vsub";
15897 break;
15899 case MULT:
15900 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15901 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15902 p = "fimul";
15903 else
15904 p = "fmul";
15905 ssep = "vmul";
15906 break;
15908 case DIV:
15909 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15910 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15911 p = "fidiv";
15912 else
15913 p = "fdiv";
15914 ssep = "vdiv";
15915 break;
15917 default:
15918 gcc_unreachable ();
15921 if (is_sse)
15923 if (TARGET_AVX)
15925 strcpy (buf, ssep);
15926 if (GET_MODE (operands[0]) == SFmode)
15927 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15928 else
15929 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15931 else
15933 strcpy (buf, ssep + 1);
15934 if (GET_MODE (operands[0]) == SFmode)
15935 strcat (buf, "ss\t{%2, %0|%0, %2}");
15936 else
15937 strcat (buf, "sd\t{%2, %0|%0, %2}");
15939 return buf;
15941 strcpy (buf, p);
15943 switch (GET_CODE (operands[3]))
15945 case MULT:
15946 case PLUS:
15947 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15949 rtx temp = operands[2];
15950 operands[2] = operands[1];
15951 operands[1] = temp;
15954 /* know operands[0] == operands[1]. */
15956 if (MEM_P (operands[2]))
15958 p = "%Z2\t%2";
15959 break;
15962 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15964 if (STACK_TOP_P (operands[0]))
15965 /* How is it that we are storing to a dead operand[2]?
15966 Well, presumably operands[1] is dead too. We can't
15967 store the result to st(0) as st(0) gets popped on this
15968 instruction. Instead store to operands[2] (which I
15969 think has to be st(1)). st(1) will be popped later.
15970 gcc <= 2.8.1 didn't have this check and generated
15971 assembly code that the Unixware assembler rejected. */
15972 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15973 else
15974 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15975 break;
15978 if (STACK_TOP_P (operands[0]))
15979 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15980 else
15981 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15982 break;
15984 case MINUS:
15985 case DIV:
15986 if (MEM_P (operands[1]))
15988 p = "r%Z1\t%1";
15989 break;
15992 if (MEM_P (operands[2]))
15994 p = "%Z2\t%2";
15995 break;
15998 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16000 #if SYSV386_COMPAT
16001 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16002 derived assemblers, confusingly reverse the direction of
16003 the operation for fsub{r} and fdiv{r} when the
16004 destination register is not st(0). The Intel assembler
16005 doesn't have this brain damage. Read !SYSV386_COMPAT to
16006 figure out what the hardware really does. */
16007 if (STACK_TOP_P (operands[0]))
16008 p = "{p\t%0, %2|rp\t%2, %0}";
16009 else
16010 p = "{rp\t%2, %0|p\t%0, %2}";
16011 #else
16012 if (STACK_TOP_P (operands[0]))
16013 /* As above for fmul/fadd, we can't store to st(0). */
16014 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16015 else
16016 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16017 #endif
16018 break;
16021 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16023 #if SYSV386_COMPAT
16024 if (STACK_TOP_P (operands[0]))
16025 p = "{rp\t%0, %1|p\t%1, %0}";
16026 else
16027 p = "{p\t%1, %0|rp\t%0, %1}";
16028 #else
16029 if (STACK_TOP_P (operands[0]))
16030 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16031 else
16032 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16033 #endif
16034 break;
16037 if (STACK_TOP_P (operands[0]))
16039 if (STACK_TOP_P (operands[1]))
16040 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16041 else
16042 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16043 break;
16045 else if (STACK_TOP_P (operands[1]))
16047 #if SYSV386_COMPAT
16048 p = "{\t%1, %0|r\t%0, %1}";
16049 #else
16050 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16051 #endif
16053 else
16055 #if SYSV386_COMPAT
16056 p = "{r\t%2, %0|\t%0, %2}";
16057 #else
16058 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16059 #endif
16061 break;
16063 default:
16064 gcc_unreachable ();
16067 strcat (buf, p);
16068 return buf;
16071 /* Check if a 256bit AVX register is referenced inside of EXP. */
16073 static int
16074 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16076 rtx exp = *pexp;
16078 if (GET_CODE (exp) == SUBREG)
16079 exp = SUBREG_REG (exp);
16081 if (REG_P (exp)
16082 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16083 return 1;
16085 return 0;
16088 /* Return needed mode for entity in optimize_mode_switching pass. */
16090 static int
16091 ix86_avx_u128_mode_needed (rtx insn)
16093 if (CALL_P (insn))
16095 rtx link;
16097 /* Needed mode is set to AVX_U128_CLEAN if there are
16098 no 256bit modes used in function arguments. */
16099 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16100 link;
16101 link = XEXP (link, 1))
16103 if (GET_CODE (XEXP (link, 0)) == USE)
16105 rtx arg = XEXP (XEXP (link, 0), 0);
16107 if (ix86_check_avx256_register (&arg, NULL))
16108 return AVX_U128_DIRTY;
16112 return AVX_U128_CLEAN;
16115 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16116 changes state only when a 256bit register is written to, but we need
16117 to prevent the compiler from moving optimal insertion point above
16118 eventual read from 256bit register. */
16119 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16120 return AVX_U128_DIRTY;
16122 return AVX_U128_ANY;
16125 /* Return mode that i387 must be switched into
16126 prior to the execution of insn. */
16128 static int
16129 ix86_i387_mode_needed (int entity, rtx insn)
16131 enum attr_i387_cw mode;
16133 /* The mode UNINITIALIZED is used to store control word after a
16134 function call or ASM pattern. The mode ANY specify that function
16135 has no requirements on the control word and make no changes in the
16136 bits we are interested in. */
16138 if (CALL_P (insn)
16139 || (NONJUMP_INSN_P (insn)
16140 && (asm_noperands (PATTERN (insn)) >= 0
16141 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16142 return I387_CW_UNINITIALIZED;
16144 if (recog_memoized (insn) < 0)
16145 return I387_CW_ANY;
16147 mode = get_attr_i387_cw (insn);
16149 switch (entity)
16151 case I387_TRUNC:
16152 if (mode == I387_CW_TRUNC)
16153 return mode;
16154 break;
16156 case I387_FLOOR:
16157 if (mode == I387_CW_FLOOR)
16158 return mode;
16159 break;
16161 case I387_CEIL:
16162 if (mode == I387_CW_CEIL)
16163 return mode;
16164 break;
16166 case I387_MASK_PM:
16167 if (mode == I387_CW_MASK_PM)
16168 return mode;
16169 break;
16171 default:
16172 gcc_unreachable ();
16175 return I387_CW_ANY;
16178 /* Return mode that entity must be switched into
16179 prior to the execution of insn. */
16182 ix86_mode_needed (int entity, rtx insn)
16184 switch (entity)
16186 case AVX_U128:
16187 return ix86_avx_u128_mode_needed (insn);
16188 case I387_TRUNC:
16189 case I387_FLOOR:
16190 case I387_CEIL:
16191 case I387_MASK_PM:
16192 return ix86_i387_mode_needed (entity, insn);
16193 default:
16194 gcc_unreachable ();
16196 return 0;
16199 /* Check if a 256bit AVX register is referenced in stores. */
16201 static void
16202 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16204 if (ix86_check_avx256_register (&dest, NULL))
16206 bool *used = (bool *) data;
16207 *used = true;
16211 /* Calculate mode of upper 128bit AVX registers after the insn. */
16213 static int
16214 ix86_avx_u128_mode_after (int mode, rtx insn)
16216 rtx pat = PATTERN (insn);
16218 if (vzeroupper_operation (pat, VOIDmode)
16219 || vzeroall_operation (pat, VOIDmode))
16220 return AVX_U128_CLEAN;
16222 /* We know that state is clean after CALL insn if there are no
16223 256bit registers used in the function return register. */
16224 if (CALL_P (insn))
16226 bool avx_reg256_found = false;
16227 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16229 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16232 /* Otherwise, return current mode. Remember that if insn
16233 references AVX 256bit registers, the mode was already changed
16234 to DIRTY from MODE_NEEDED. */
16235 return mode;
16238 /* Return the mode that an insn results in. */
16241 ix86_mode_after (int entity, int mode, rtx insn)
16243 switch (entity)
16245 case AVX_U128:
16246 return ix86_avx_u128_mode_after (mode, insn);
16247 case I387_TRUNC:
16248 case I387_FLOOR:
16249 case I387_CEIL:
16250 case I387_MASK_PM:
16251 return mode;
16252 default:
16253 gcc_unreachable ();
16257 static int
16258 ix86_avx_u128_mode_entry (void)
16260 tree arg;
16262 /* Entry mode is set to AVX_U128_DIRTY if there are
16263 256bit modes used in function arguments. */
16264 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16265 arg = TREE_CHAIN (arg))
16267 rtx incoming = DECL_INCOMING_RTL (arg);
16269 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16270 return AVX_U128_DIRTY;
16273 return AVX_U128_CLEAN;
16276 /* Return a mode that ENTITY is assumed to be
16277 switched to at function entry. */
16280 ix86_mode_entry (int entity)
16282 switch (entity)
16284 case AVX_U128:
16285 return ix86_avx_u128_mode_entry ();
16286 case I387_TRUNC:
16287 case I387_FLOOR:
16288 case I387_CEIL:
16289 case I387_MASK_PM:
16290 return I387_CW_ANY;
16291 default:
16292 gcc_unreachable ();
16296 static int
16297 ix86_avx_u128_mode_exit (void)
16299 rtx reg = crtl->return_rtx;
16301 /* Exit mode is set to AVX_U128_DIRTY if there are
16302 256bit modes used in the function return register. */
16303 if (reg && ix86_check_avx256_register (&reg, NULL))
16304 return AVX_U128_DIRTY;
16306 return AVX_U128_CLEAN;
16309 /* Return a mode that ENTITY is assumed to be
16310 switched to at function exit. */
16313 ix86_mode_exit (int entity)
16315 switch (entity)
16317 case AVX_U128:
16318 return ix86_avx_u128_mode_exit ();
16319 case I387_TRUNC:
16320 case I387_FLOOR:
16321 case I387_CEIL:
16322 case I387_MASK_PM:
16323 return I387_CW_ANY;
16324 default:
16325 gcc_unreachable ();
16329 /* Output code to initialize control word copies used by trunc?f?i and
16330 rounding patterns. CURRENT_MODE is set to current control word,
16331 while NEW_MODE is set to new control word. */
16333 static void
16334 emit_i387_cw_initialization (int mode)
16336 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16337 rtx new_mode;
16339 enum ix86_stack_slot slot;
16341 rtx reg = gen_reg_rtx (HImode);
16343 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16344 emit_move_insn (reg, copy_rtx (stored_mode));
16346 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16347 || optimize_insn_for_size_p ())
16349 switch (mode)
16351 case I387_CW_TRUNC:
16352 /* round toward zero (truncate) */
16353 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16354 slot = SLOT_CW_TRUNC;
16355 break;
16357 case I387_CW_FLOOR:
16358 /* round down toward -oo */
16359 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16360 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16361 slot = SLOT_CW_FLOOR;
16362 break;
16364 case I387_CW_CEIL:
16365 /* round up toward +oo */
16366 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16367 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16368 slot = SLOT_CW_CEIL;
16369 break;
16371 case I387_CW_MASK_PM:
16372 /* mask precision exception for nearbyint() */
16373 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16374 slot = SLOT_CW_MASK_PM;
16375 break;
16377 default:
16378 gcc_unreachable ();
16381 else
16383 switch (mode)
16385 case I387_CW_TRUNC:
16386 /* round toward zero (truncate) */
16387 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16388 slot = SLOT_CW_TRUNC;
16389 break;
16391 case I387_CW_FLOOR:
16392 /* round down toward -oo */
16393 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16394 slot = SLOT_CW_FLOOR;
16395 break;
16397 case I387_CW_CEIL:
16398 /* round up toward +oo */
16399 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16400 slot = SLOT_CW_CEIL;
16401 break;
16403 case I387_CW_MASK_PM:
16404 /* mask precision exception for nearbyint() */
16405 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16406 slot = SLOT_CW_MASK_PM;
16407 break;
16409 default:
16410 gcc_unreachable ();
16414 gcc_assert (slot < MAX_386_STACK_LOCALS);
16416 new_mode = assign_386_stack_local (HImode, slot);
16417 emit_move_insn (new_mode, reg);
16420 /* Emit vzeroupper. */
16422 void
16423 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16425 int i;
16427 /* Cancel automatic vzeroupper insertion if there are
16428 live call-saved SSE registers at the insertion point. */
16430 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16431 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16432 return;
16434 if (TARGET_64BIT)
16435 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16436 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16437 return;
16439 emit_insn (gen_avx_vzeroupper ());
16442 /* Generate one or more insns to set ENTITY to MODE. */
16444 void
16445 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16447 switch (entity)
16449 case AVX_U128:
16450 if (mode == AVX_U128_CLEAN)
16451 ix86_avx_emit_vzeroupper (regs_live);
16452 break;
16453 case I387_TRUNC:
16454 case I387_FLOOR:
16455 case I387_CEIL:
16456 case I387_MASK_PM:
16457 if (mode != I387_CW_ANY
16458 && mode != I387_CW_UNINITIALIZED)
16459 emit_i387_cw_initialization (mode);
16460 break;
16461 default:
16462 gcc_unreachable ();
16466 /* Output code for INSN to convert a float to a signed int. OPERANDS
16467 are the insn operands. The output may be [HSD]Imode and the input
16468 operand may be [SDX]Fmode. */
16470 const char *
16471 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16473 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16474 int dimode_p = GET_MODE (operands[0]) == DImode;
16475 int round_mode = get_attr_i387_cw (insn);
16477 /* Jump through a hoop or two for DImode, since the hardware has no
16478 non-popping instruction. We used to do this a different way, but
16479 that was somewhat fragile and broke with post-reload splitters. */
16480 if ((dimode_p || fisttp) && !stack_top_dies)
16481 output_asm_insn ("fld\t%y1", operands);
16483 gcc_assert (STACK_TOP_P (operands[1]));
16484 gcc_assert (MEM_P (operands[0]));
16485 gcc_assert (GET_MODE (operands[1]) != TFmode);
16487 if (fisttp)
16488 output_asm_insn ("fisttp%Z0\t%0", operands);
16489 else
16491 if (round_mode != I387_CW_ANY)
16492 output_asm_insn ("fldcw\t%3", operands);
16493 if (stack_top_dies || dimode_p)
16494 output_asm_insn ("fistp%Z0\t%0", operands);
16495 else
16496 output_asm_insn ("fist%Z0\t%0", operands);
16497 if (round_mode != I387_CW_ANY)
16498 output_asm_insn ("fldcw\t%2", operands);
16501 return "";
16504 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16505 have the values zero or one, indicates the ffreep insn's operand
16506 from the OPERANDS array. */
16508 static const char *
16509 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16511 if (TARGET_USE_FFREEP)
16512 #ifdef HAVE_AS_IX86_FFREEP
16513 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16514 #else
16516 static char retval[32];
16517 int regno = REGNO (operands[opno]);
16519 gcc_assert (STACK_REGNO_P (regno));
16521 regno -= FIRST_STACK_REG;
16523 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16524 return retval;
16526 #endif
16528 return opno ? "fstp\t%y1" : "fstp\t%y0";
16532 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16533 should be used. UNORDERED_P is true when fucom should be used. */
16535 const char *
16536 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16538 int stack_top_dies;
16539 rtx cmp_op0, cmp_op1;
16540 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16542 if (eflags_p)
16544 cmp_op0 = operands[0];
16545 cmp_op1 = operands[1];
16547 else
16549 cmp_op0 = operands[1];
16550 cmp_op1 = operands[2];
16553 if (is_sse)
16555 if (GET_MODE (operands[0]) == SFmode)
16556 if (unordered_p)
16557 return "%vucomiss\t{%1, %0|%0, %1}";
16558 else
16559 return "%vcomiss\t{%1, %0|%0, %1}";
16560 else
16561 if (unordered_p)
16562 return "%vucomisd\t{%1, %0|%0, %1}";
16563 else
16564 return "%vcomisd\t{%1, %0|%0, %1}";
16567 gcc_assert (STACK_TOP_P (cmp_op0));
16569 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16571 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16573 if (stack_top_dies)
16575 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16576 return output_387_ffreep (operands, 1);
16578 else
16579 return "ftst\n\tfnstsw\t%0";
16582 if (STACK_REG_P (cmp_op1)
16583 && stack_top_dies
16584 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16585 && REGNO (cmp_op1) != FIRST_STACK_REG)
16587 /* If both the top of the 387 stack dies, and the other operand
16588 is also a stack register that dies, then this must be a
16589 `fcompp' float compare */
16591 if (eflags_p)
16593 /* There is no double popping fcomi variant. Fortunately,
16594 eflags is immune from the fstp's cc clobbering. */
16595 if (unordered_p)
16596 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16597 else
16598 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16599 return output_387_ffreep (operands, 0);
16601 else
16603 if (unordered_p)
16604 return "fucompp\n\tfnstsw\t%0";
16605 else
16606 return "fcompp\n\tfnstsw\t%0";
16609 else
16611 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16613 static const char * const alt[16] =
16615 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16616 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16617 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16618 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16620 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16621 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16622 NULL,
16623 NULL,
16625 "fcomi\t{%y1, %0|%0, %y1}",
16626 "fcomip\t{%y1, %0|%0, %y1}",
16627 "fucomi\t{%y1, %0|%0, %y1}",
16628 "fucomip\t{%y1, %0|%0, %y1}",
16630 NULL,
16631 NULL,
16632 NULL,
16633 NULL
16636 int mask;
16637 const char *ret;
16639 mask = eflags_p << 3;
16640 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16641 mask |= unordered_p << 1;
16642 mask |= stack_top_dies;
16644 gcc_assert (mask < 16);
16645 ret = alt[mask];
16646 gcc_assert (ret);
16648 return ret;
16652 void
16653 ix86_output_addr_vec_elt (FILE *file, int value)
16655 const char *directive = ASM_LONG;
16657 #ifdef ASM_QUAD
16658 if (TARGET_LP64)
16659 directive = ASM_QUAD;
16660 #else
16661 gcc_assert (!TARGET_64BIT);
16662 #endif
16664 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16667 void
16668 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16670 const char *directive = ASM_LONG;
16672 #ifdef ASM_QUAD
16673 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16674 directive = ASM_QUAD;
16675 #else
16676 gcc_assert (!TARGET_64BIT);
16677 #endif
16678 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16679 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16680 fprintf (file, "%s%s%d-%s%d\n",
16681 directive, LPREFIX, value, LPREFIX, rel);
16682 else if (HAVE_AS_GOTOFF_IN_DATA)
16683 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16684 #if TARGET_MACHO
16685 else if (TARGET_MACHO)
16687 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16688 machopic_output_function_base_name (file);
16689 putc ('\n', file);
16691 #endif
16692 else
16693 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16694 GOT_SYMBOL_NAME, LPREFIX, value);
16697 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16698 for the target. */
16700 void
16701 ix86_expand_clear (rtx dest)
16703 rtx tmp;
16705 /* We play register width games, which are only valid after reload. */
16706 gcc_assert (reload_completed);
16708 /* Avoid HImode and its attendant prefix byte. */
16709 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16710 dest = gen_rtx_REG (SImode, REGNO (dest));
16711 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16713 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16714 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16716 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16717 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16720 emit_insn (tmp);
16723 /* X is an unchanging MEM. If it is a constant pool reference, return
16724 the constant pool rtx, else NULL. */
16727 maybe_get_pool_constant (rtx x)
16729 x = ix86_delegitimize_address (XEXP (x, 0));
16731 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16732 return get_pool_constant (x);
16734 return NULL_RTX;
16737 void
16738 ix86_expand_move (enum machine_mode mode, rtx operands[])
16740 rtx op0, op1;
16741 enum tls_model model;
16743 op0 = operands[0];
16744 op1 = operands[1];
16746 if (GET_CODE (op1) == SYMBOL_REF)
16748 rtx tmp;
16750 model = SYMBOL_REF_TLS_MODEL (op1);
16751 if (model)
16753 op1 = legitimize_tls_address (op1, model, true);
16754 op1 = force_operand (op1, op0);
16755 if (op1 == op0)
16756 return;
16757 op1 = convert_to_mode (mode, op1, 1);
16759 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16760 op1 = tmp;
16762 else if (GET_CODE (op1) == CONST
16763 && GET_CODE (XEXP (op1, 0)) == PLUS
16764 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16766 rtx addend = XEXP (XEXP (op1, 0), 1);
16767 rtx symbol = XEXP (XEXP (op1, 0), 0);
16768 rtx tmp;
16770 model = SYMBOL_REF_TLS_MODEL (symbol);
16771 if (model)
16772 tmp = legitimize_tls_address (symbol, model, true);
16773 else
16774 tmp = legitimize_pe_coff_symbol (symbol, true);
16776 if (tmp)
16778 tmp = force_operand (tmp, NULL);
16779 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16780 op0, 1, OPTAB_DIRECT);
16781 if (tmp == op0)
16782 return;
16783 op1 = convert_to_mode (mode, tmp, 1);
16787 if ((flag_pic || MACHOPIC_INDIRECT)
16788 && symbolic_operand (op1, mode))
16790 if (TARGET_MACHO && !TARGET_64BIT)
16792 #if TARGET_MACHO
16793 /* dynamic-no-pic */
16794 if (MACHOPIC_INDIRECT)
16796 rtx temp = ((reload_in_progress
16797 || ((op0 && REG_P (op0))
16798 && mode == Pmode))
16799 ? op0 : gen_reg_rtx (Pmode));
16800 op1 = machopic_indirect_data_reference (op1, temp);
16801 if (MACHOPIC_PURE)
16802 op1 = machopic_legitimize_pic_address (op1, mode,
16803 temp == op1 ? 0 : temp);
16805 if (op0 != op1 && GET_CODE (op0) != MEM)
16807 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16808 emit_insn (insn);
16809 return;
16811 if (GET_CODE (op0) == MEM)
16812 op1 = force_reg (Pmode, op1);
16813 else
16815 rtx temp = op0;
16816 if (GET_CODE (temp) != REG)
16817 temp = gen_reg_rtx (Pmode);
16818 temp = legitimize_pic_address (op1, temp);
16819 if (temp == op0)
16820 return;
16821 op1 = temp;
16823 /* dynamic-no-pic */
16824 #endif
16826 else
16828 if (MEM_P (op0))
16829 op1 = force_reg (mode, op1);
16830 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16832 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16833 op1 = legitimize_pic_address (op1, reg);
16834 if (op0 == op1)
16835 return;
16836 op1 = convert_to_mode (mode, op1, 1);
16840 else
16842 if (MEM_P (op0)
16843 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16844 || !push_operand (op0, mode))
16845 && MEM_P (op1))
16846 op1 = force_reg (mode, op1);
16848 if (push_operand (op0, mode)
16849 && ! general_no_elim_operand (op1, mode))
16850 op1 = copy_to_mode_reg (mode, op1);
16852 /* Force large constants in 64bit compilation into register
16853 to get them CSEed. */
16854 if (can_create_pseudo_p ()
16855 && (mode == DImode) && TARGET_64BIT
16856 && immediate_operand (op1, mode)
16857 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16858 && !register_operand (op0, mode)
16859 && optimize)
16860 op1 = copy_to_mode_reg (mode, op1);
16862 if (can_create_pseudo_p ()
16863 && FLOAT_MODE_P (mode)
16864 && GET_CODE (op1) == CONST_DOUBLE)
16866 /* If we are loading a floating point constant to a register,
16867 force the value to memory now, since we'll get better code
16868 out the back end. */
16870 op1 = validize_mem (force_const_mem (mode, op1));
16871 if (!register_operand (op0, mode))
16873 rtx temp = gen_reg_rtx (mode);
16874 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16875 emit_move_insn (op0, temp);
16876 return;
16881 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16884 void
16885 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16887 rtx op0 = operands[0], op1 = operands[1];
16888 unsigned int align = GET_MODE_ALIGNMENT (mode);
16890 if (push_operand (op0, VOIDmode))
16891 op0 = emit_move_resolve_push (mode, op0);
16893 /* Force constants other than zero into memory. We do not know how
16894 the instructions used to build constants modify the upper 64 bits
16895 of the register, once we have that information we may be able
16896 to handle some of them more efficiently. */
16897 if (can_create_pseudo_p ()
16898 && register_operand (op0, mode)
16899 && (CONSTANT_P (op1)
16900 || (GET_CODE (op1) == SUBREG
16901 && CONSTANT_P (SUBREG_REG (op1))))
16902 && !standard_sse_constant_p (op1))
16903 op1 = validize_mem (force_const_mem (mode, op1));
16905 /* We need to check memory alignment for SSE mode since attribute
16906 can make operands unaligned. */
16907 if (can_create_pseudo_p ()
16908 && SSE_REG_MODE_P (mode)
16909 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16910 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16912 rtx tmp[2];
16914 /* ix86_expand_vector_move_misalign() does not like constants ... */
16915 if (CONSTANT_P (op1)
16916 || (GET_CODE (op1) == SUBREG
16917 && CONSTANT_P (SUBREG_REG (op1))))
16918 op1 = validize_mem (force_const_mem (mode, op1));
16920 /* ... nor both arguments in memory. */
16921 if (!register_operand (op0, mode)
16922 && !register_operand (op1, mode))
16923 op1 = force_reg (mode, op1);
16925 tmp[0] = op0; tmp[1] = op1;
16926 ix86_expand_vector_move_misalign (mode, tmp);
16927 return;
16930 /* Make operand1 a register if it isn't already. */
16931 if (can_create_pseudo_p ()
16932 && !register_operand (op0, mode)
16933 && !register_operand (op1, mode))
16935 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16936 return;
16939 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16942 /* Split 32-byte AVX unaligned load and store if needed. */
16944 static void
16945 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16947 rtx m;
16948 rtx (*extract) (rtx, rtx, rtx);
16949 rtx (*load_unaligned) (rtx, rtx);
16950 rtx (*store_unaligned) (rtx, rtx);
16951 enum machine_mode mode;
16953 switch (GET_MODE (op0))
16955 default:
16956 gcc_unreachable ();
16957 case V32QImode:
16958 extract = gen_avx_vextractf128v32qi;
16959 load_unaligned = gen_avx_loaddquv32qi;
16960 store_unaligned = gen_avx_storedquv32qi;
16961 mode = V16QImode;
16962 break;
16963 case V8SFmode:
16964 extract = gen_avx_vextractf128v8sf;
16965 load_unaligned = gen_avx_loadups256;
16966 store_unaligned = gen_avx_storeups256;
16967 mode = V4SFmode;
16968 break;
16969 case V4DFmode:
16970 extract = gen_avx_vextractf128v4df;
16971 load_unaligned = gen_avx_loadupd256;
16972 store_unaligned = gen_avx_storeupd256;
16973 mode = V2DFmode;
16974 break;
16977 if (MEM_P (op1))
16979 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16981 rtx r = gen_reg_rtx (mode);
16982 m = adjust_address (op1, mode, 0);
16983 emit_move_insn (r, m);
16984 m = adjust_address (op1, mode, 16);
16985 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16986 emit_move_insn (op0, r);
16988 /* Normal *mov<mode>_internal pattern will handle
16989 unaligned loads just fine if misaligned_operand
16990 is true, and without the UNSPEC it can be combined
16991 with arithmetic instructions. */
16992 else if (misaligned_operand (op1, GET_MODE (op1)))
16993 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16994 else
16995 emit_insn (load_unaligned (op0, op1));
16997 else if (MEM_P (op0))
16999 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17001 m = adjust_address (op0, mode, 0);
17002 emit_insn (extract (m, op1, const0_rtx));
17003 m = adjust_address (op0, mode, 16);
17004 emit_insn (extract (m, op1, const1_rtx));
17006 else
17007 emit_insn (store_unaligned (op0, op1));
17009 else
17010 gcc_unreachable ();
17013 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17014 straight to ix86_expand_vector_move. */
17015 /* Code generation for scalar reg-reg moves of single and double precision data:
17016 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17017 movaps reg, reg
17018 else
17019 movss reg, reg
17020 if (x86_sse_partial_reg_dependency == true)
17021 movapd reg, reg
17022 else
17023 movsd reg, reg
17025 Code generation for scalar loads of double precision data:
17026 if (x86_sse_split_regs == true)
17027 movlpd mem, reg (gas syntax)
17028 else
17029 movsd mem, reg
17031 Code generation for unaligned packed loads of single precision data
17032 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17033 if (x86_sse_unaligned_move_optimal)
17034 movups mem, reg
17036 if (x86_sse_partial_reg_dependency == true)
17038 xorps reg, reg
17039 movlps mem, reg
17040 movhps mem+8, reg
17042 else
17044 movlps mem, reg
17045 movhps mem+8, reg
17048 Code generation for unaligned packed loads of double precision data
17049 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17050 if (x86_sse_unaligned_move_optimal)
17051 movupd mem, reg
17053 if (x86_sse_split_regs == true)
17055 movlpd mem, reg
17056 movhpd mem+8, reg
17058 else
17060 movsd mem, reg
17061 movhpd mem+8, reg
17065 void
17066 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17068 rtx op0, op1, orig_op0 = NULL_RTX, m;
17069 rtx (*load_unaligned) (rtx, rtx);
17070 rtx (*store_unaligned) (rtx, rtx);
17072 op0 = operands[0];
17073 op1 = operands[1];
17075 if (GET_MODE_SIZE (mode) == 64)
17077 switch (GET_MODE_CLASS (mode))
17079 case MODE_VECTOR_INT:
17080 case MODE_INT:
17081 if (GET_MODE (op0) != V16SImode)
17083 if (!MEM_P (op0))
17085 orig_op0 = op0;
17086 op0 = gen_reg_rtx (V16SImode);
17088 else
17089 op0 = gen_lowpart (V16SImode, op0);
17091 op1 = gen_lowpart (V16SImode, op1);
17092 /* FALLTHRU */
17094 case MODE_VECTOR_FLOAT:
17095 switch (GET_MODE (op0))
17097 default:
17098 gcc_unreachable ();
17099 case V16SImode:
17100 load_unaligned = gen_avx512f_loaddquv16si;
17101 store_unaligned = gen_avx512f_storedquv16si;
17102 break;
17103 case V16SFmode:
17104 load_unaligned = gen_avx512f_loadups512;
17105 store_unaligned = gen_avx512f_storeups512;
17106 break;
17107 case V8DFmode:
17108 load_unaligned = gen_avx512f_loadupd512;
17109 store_unaligned = gen_avx512f_storeupd512;
17110 break;
17113 if (MEM_P (op1))
17114 emit_insn (load_unaligned (op0, op1));
17115 else if (MEM_P (op0))
17116 emit_insn (store_unaligned (op0, op1));
17117 else
17118 gcc_unreachable ();
17119 if (orig_op0)
17120 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17121 break;
17123 default:
17124 gcc_unreachable ();
17127 return;
17130 if (TARGET_AVX
17131 && GET_MODE_SIZE (mode) == 32)
17133 switch (GET_MODE_CLASS (mode))
17135 case MODE_VECTOR_INT:
17136 case MODE_INT:
17137 if (GET_MODE (op0) != V32QImode)
17139 if (!MEM_P (op0))
17141 orig_op0 = op0;
17142 op0 = gen_reg_rtx (V32QImode);
17144 else
17145 op0 = gen_lowpart (V32QImode, op0);
17147 op1 = gen_lowpart (V32QImode, op1);
17148 /* FALLTHRU */
17150 case MODE_VECTOR_FLOAT:
17151 ix86_avx256_split_vector_move_misalign (op0, op1);
17152 if (orig_op0)
17153 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17154 break;
17156 default:
17157 gcc_unreachable ();
17160 return;
17163 if (MEM_P (op1))
17165 /* Normal *mov<mode>_internal pattern will handle
17166 unaligned loads just fine if misaligned_operand
17167 is true, and without the UNSPEC it can be combined
17168 with arithmetic instructions. */
17169 if (TARGET_AVX
17170 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17171 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17172 && misaligned_operand (op1, GET_MODE (op1)))
17173 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17174 /* ??? If we have typed data, then it would appear that using
17175 movdqu is the only way to get unaligned data loaded with
17176 integer type. */
17177 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17179 if (GET_MODE (op0) != V16QImode)
17181 orig_op0 = op0;
17182 op0 = gen_reg_rtx (V16QImode);
17184 op1 = gen_lowpart (V16QImode, op1);
17185 /* We will eventually emit movups based on insn attributes. */
17186 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17187 if (orig_op0)
17188 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17190 else if (TARGET_SSE2 && mode == V2DFmode)
17192 rtx zero;
17194 if (TARGET_AVX
17195 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17196 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17197 || optimize_insn_for_size_p ())
17199 /* We will eventually emit movups based on insn attributes. */
17200 emit_insn (gen_sse2_loadupd (op0, op1));
17201 return;
17204 /* When SSE registers are split into halves, we can avoid
17205 writing to the top half twice. */
17206 if (TARGET_SSE_SPLIT_REGS)
17208 emit_clobber (op0);
17209 zero = op0;
17211 else
17213 /* ??? Not sure about the best option for the Intel chips.
17214 The following would seem to satisfy; the register is
17215 entirely cleared, breaking the dependency chain. We
17216 then store to the upper half, with a dependency depth
17217 of one. A rumor has it that Intel recommends two movsd
17218 followed by an unpacklpd, but this is unconfirmed. And
17219 given that the dependency depth of the unpacklpd would
17220 still be one, I'm not sure why this would be better. */
17221 zero = CONST0_RTX (V2DFmode);
17224 m = adjust_address (op1, DFmode, 0);
17225 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17226 m = adjust_address (op1, DFmode, 8);
17227 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17229 else
17231 rtx t;
17233 if (TARGET_AVX
17234 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17235 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17236 || optimize_insn_for_size_p ())
17238 if (GET_MODE (op0) != V4SFmode)
17240 orig_op0 = op0;
17241 op0 = gen_reg_rtx (V4SFmode);
17243 op1 = gen_lowpart (V4SFmode, op1);
17244 emit_insn (gen_sse_loadups (op0, op1));
17245 if (orig_op0)
17246 emit_move_insn (orig_op0,
17247 gen_lowpart (GET_MODE (orig_op0), op0));
17248 return;
17251 if (mode != V4SFmode)
17252 t = gen_reg_rtx (V4SFmode);
17253 else
17254 t = op0;
17256 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17257 emit_move_insn (t, CONST0_RTX (V4SFmode));
17258 else
17259 emit_clobber (t);
17261 m = adjust_address (op1, V2SFmode, 0);
17262 emit_insn (gen_sse_loadlps (t, t, m));
17263 m = adjust_address (op1, V2SFmode, 8);
17264 emit_insn (gen_sse_loadhps (t, t, m));
17265 if (mode != V4SFmode)
17266 emit_move_insn (op0, gen_lowpart (mode, t));
17269 else if (MEM_P (op0))
17271 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17273 op0 = gen_lowpart (V16QImode, op0);
17274 op1 = gen_lowpart (V16QImode, op1);
17275 /* We will eventually emit movups based on insn attributes. */
17276 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17278 else if (TARGET_SSE2 && mode == V2DFmode)
17280 if (TARGET_AVX
17281 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17282 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17283 || optimize_insn_for_size_p ())
17284 /* We will eventually emit movups based on insn attributes. */
17285 emit_insn (gen_sse2_storeupd (op0, op1));
17286 else
17288 m = adjust_address (op0, DFmode, 0);
17289 emit_insn (gen_sse2_storelpd (m, op1));
17290 m = adjust_address (op0, DFmode, 8);
17291 emit_insn (gen_sse2_storehpd (m, op1));
17294 else
17296 if (mode != V4SFmode)
17297 op1 = gen_lowpart (V4SFmode, op1);
17299 if (TARGET_AVX
17300 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17301 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17302 || optimize_insn_for_size_p ())
17304 op0 = gen_lowpart (V4SFmode, op0);
17305 emit_insn (gen_sse_storeups (op0, op1));
17307 else
17309 m = adjust_address (op0, V2SFmode, 0);
17310 emit_insn (gen_sse_storelps (m, op1));
17311 m = adjust_address (op0, V2SFmode, 8);
17312 emit_insn (gen_sse_storehps (m, op1));
17316 else
17317 gcc_unreachable ();
17320 /* Helper function of ix86_fixup_binary_operands to canonicalize
17321 operand order. Returns true if the operands should be swapped. */
17323 static bool
17324 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17325 rtx operands[])
17327 rtx dst = operands[0];
17328 rtx src1 = operands[1];
17329 rtx src2 = operands[2];
17331 /* If the operation is not commutative, we can't do anything. */
17332 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17333 return false;
17335 /* Highest priority is that src1 should match dst. */
17336 if (rtx_equal_p (dst, src1))
17337 return false;
17338 if (rtx_equal_p (dst, src2))
17339 return true;
17341 /* Next highest priority is that immediate constants come second. */
17342 if (immediate_operand (src2, mode))
17343 return false;
17344 if (immediate_operand (src1, mode))
17345 return true;
17347 /* Lowest priority is that memory references should come second. */
17348 if (MEM_P (src2))
17349 return false;
17350 if (MEM_P (src1))
17351 return true;
17353 return false;
17357 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17358 destination to use for the operation. If different from the true
17359 destination in operands[0], a copy operation will be required. */
17362 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17363 rtx operands[])
17365 rtx dst = operands[0];
17366 rtx src1 = operands[1];
17367 rtx src2 = operands[2];
17369 /* Canonicalize operand order. */
17370 if (ix86_swap_binary_operands_p (code, mode, operands))
17372 rtx temp;
17374 /* It is invalid to swap operands of different modes. */
17375 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17377 temp = src1;
17378 src1 = src2;
17379 src2 = temp;
17382 /* Both source operands cannot be in memory. */
17383 if (MEM_P (src1) && MEM_P (src2))
17385 /* Optimization: Only read from memory once. */
17386 if (rtx_equal_p (src1, src2))
17388 src2 = force_reg (mode, src2);
17389 src1 = src2;
17391 else if (rtx_equal_p (dst, src1))
17392 src2 = force_reg (mode, src2);
17393 else
17394 src1 = force_reg (mode, src1);
17397 /* If the destination is memory, and we do not have matching source
17398 operands, do things in registers. */
17399 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17400 dst = gen_reg_rtx (mode);
17402 /* Source 1 cannot be a constant. */
17403 if (CONSTANT_P (src1))
17404 src1 = force_reg (mode, src1);
17406 /* Source 1 cannot be a non-matching memory. */
17407 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17408 src1 = force_reg (mode, src1);
17410 /* Improve address combine. */
17411 if (code == PLUS
17412 && GET_MODE_CLASS (mode) == MODE_INT
17413 && MEM_P (src2))
17414 src2 = force_reg (mode, src2);
17416 operands[1] = src1;
17417 operands[2] = src2;
17418 return dst;
17421 /* Similarly, but assume that the destination has already been
17422 set up properly. */
17424 void
17425 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17426 enum machine_mode mode, rtx operands[])
17428 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17429 gcc_assert (dst == operands[0]);
17432 /* Attempt to expand a binary operator. Make the expansion closer to the
17433 actual machine, then just general_operand, which will allow 3 separate
17434 memory references (one output, two input) in a single insn. */
17436 void
17437 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17438 rtx operands[])
17440 rtx src1, src2, dst, op, clob;
17442 dst = ix86_fixup_binary_operands (code, mode, operands);
17443 src1 = operands[1];
17444 src2 = operands[2];
17446 /* Emit the instruction. */
17448 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17449 if (reload_in_progress)
17451 /* Reload doesn't know about the flags register, and doesn't know that
17452 it doesn't want to clobber it. We can only do this with PLUS. */
17453 gcc_assert (code == PLUS);
17454 emit_insn (op);
17456 else if (reload_completed
17457 && code == PLUS
17458 && !rtx_equal_p (dst, src1))
17460 /* This is going to be an LEA; avoid splitting it later. */
17461 emit_insn (op);
17463 else
17465 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17466 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17469 /* Fix up the destination if needed. */
17470 if (dst != operands[0])
17471 emit_move_insn (operands[0], dst);
17474 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17475 the given OPERANDS. */
17477 void
17478 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17479 rtx operands[])
17481 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17482 if (GET_CODE (operands[1]) == SUBREG)
17484 op1 = operands[1];
17485 op2 = operands[2];
17487 else if (GET_CODE (operands[2]) == SUBREG)
17489 op1 = operands[2];
17490 op2 = operands[1];
17492 /* Optimize (__m128i) d | (__m128i) e and similar code
17493 when d and e are float vectors into float vector logical
17494 insn. In C/C++ without using intrinsics there is no other way
17495 to express vector logical operation on float vectors than
17496 to cast them temporarily to integer vectors. */
17497 if (op1
17498 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17499 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17500 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17501 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17502 && SUBREG_BYTE (op1) == 0
17503 && (GET_CODE (op2) == CONST_VECTOR
17504 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17505 && SUBREG_BYTE (op2) == 0))
17506 && can_create_pseudo_p ())
17508 rtx dst;
17509 switch (GET_MODE (SUBREG_REG (op1)))
17511 case V4SFmode:
17512 case V8SFmode:
17513 case V2DFmode:
17514 case V4DFmode:
17515 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17516 if (GET_CODE (op2) == CONST_VECTOR)
17518 op2 = gen_lowpart (GET_MODE (dst), op2);
17519 op2 = force_reg (GET_MODE (dst), op2);
17521 else
17523 op1 = operands[1];
17524 op2 = SUBREG_REG (operands[2]);
17525 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17526 op2 = force_reg (GET_MODE (dst), op2);
17528 op1 = SUBREG_REG (op1);
17529 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17530 op1 = force_reg (GET_MODE (dst), op1);
17531 emit_insn (gen_rtx_SET (VOIDmode, dst,
17532 gen_rtx_fmt_ee (code, GET_MODE (dst),
17533 op1, op2)));
17534 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17535 return;
17536 default:
17537 break;
17540 if (!nonimmediate_operand (operands[1], mode))
17541 operands[1] = force_reg (mode, operands[1]);
17542 if (!nonimmediate_operand (operands[2], mode))
17543 operands[2] = force_reg (mode, operands[2]);
17544 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17545 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17546 gen_rtx_fmt_ee (code, mode, operands[1],
17547 operands[2])));
17550 /* Return TRUE or FALSE depending on whether the binary operator meets the
17551 appropriate constraints. */
17553 bool
17554 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17555 rtx operands[3])
17557 rtx dst = operands[0];
17558 rtx src1 = operands[1];
17559 rtx src2 = operands[2];
17561 /* Both source operands cannot be in memory. */
17562 if (MEM_P (src1) && MEM_P (src2))
17563 return false;
17565 /* Canonicalize operand order for commutative operators. */
17566 if (ix86_swap_binary_operands_p (code, mode, operands))
17568 rtx temp = src1;
17569 src1 = src2;
17570 src2 = temp;
17573 /* If the destination is memory, we must have a matching source operand. */
17574 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17575 return false;
17577 /* Source 1 cannot be a constant. */
17578 if (CONSTANT_P (src1))
17579 return false;
17581 /* Source 1 cannot be a non-matching memory. */
17582 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17583 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17584 return (code == AND
17585 && (mode == HImode
17586 || mode == SImode
17587 || (TARGET_64BIT && mode == DImode))
17588 && satisfies_constraint_L (src2));
17590 return true;
17593 /* Attempt to expand a unary operator. Make the expansion closer to the
17594 actual machine, then just general_operand, which will allow 2 separate
17595 memory references (one output, one input) in a single insn. */
17597 void
17598 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17599 rtx operands[])
17601 int matching_memory;
17602 rtx src, dst, op, clob;
17604 dst = operands[0];
17605 src = operands[1];
17607 /* If the destination is memory, and we do not have matching source
17608 operands, do things in registers. */
17609 matching_memory = 0;
17610 if (MEM_P (dst))
17612 if (rtx_equal_p (dst, src))
17613 matching_memory = 1;
17614 else
17615 dst = gen_reg_rtx (mode);
17618 /* When source operand is memory, destination must match. */
17619 if (MEM_P (src) && !matching_memory)
17620 src = force_reg (mode, src);
17622 /* Emit the instruction. */
17624 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17625 if (reload_in_progress || code == NOT)
17627 /* Reload doesn't know about the flags register, and doesn't know that
17628 it doesn't want to clobber it. */
17629 gcc_assert (code == NOT);
17630 emit_insn (op);
17632 else
17634 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17635 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17638 /* Fix up the destination if needed. */
17639 if (dst != operands[0])
17640 emit_move_insn (operands[0], dst);
17643 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17644 divisor are within the range [0-255]. */
17646 void
17647 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17648 bool signed_p)
17650 rtx end_label, qimode_label;
17651 rtx insn, div, mod;
17652 rtx scratch, tmp0, tmp1, tmp2;
17653 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17654 rtx (*gen_zero_extend) (rtx, rtx);
17655 rtx (*gen_test_ccno_1) (rtx, rtx);
17657 switch (mode)
17659 case SImode:
17660 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17661 gen_test_ccno_1 = gen_testsi_ccno_1;
17662 gen_zero_extend = gen_zero_extendqisi2;
17663 break;
17664 case DImode:
17665 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17666 gen_test_ccno_1 = gen_testdi_ccno_1;
17667 gen_zero_extend = gen_zero_extendqidi2;
17668 break;
17669 default:
17670 gcc_unreachable ();
17673 end_label = gen_label_rtx ();
17674 qimode_label = gen_label_rtx ();
17676 scratch = gen_reg_rtx (mode);
17678 /* Use 8bit unsigned divimod if dividend and divisor are within
17679 the range [0-255]. */
17680 emit_move_insn (scratch, operands[2]);
17681 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17682 scratch, 1, OPTAB_DIRECT);
17683 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17684 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17685 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17686 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17687 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17688 pc_rtx);
17689 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17690 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17691 JUMP_LABEL (insn) = qimode_label;
17693 /* Generate original signed/unsigned divimod. */
17694 div = gen_divmod4_1 (operands[0], operands[1],
17695 operands[2], operands[3]);
17696 emit_insn (div);
17698 /* Branch to the end. */
17699 emit_jump_insn (gen_jump (end_label));
17700 emit_barrier ();
17702 /* Generate 8bit unsigned divide. */
17703 emit_label (qimode_label);
17704 /* Don't use operands[0] for result of 8bit divide since not all
17705 registers support QImode ZERO_EXTRACT. */
17706 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17707 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17708 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17709 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17711 if (signed_p)
17713 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17714 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17716 else
17718 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17719 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17722 /* Extract remainder from AH. */
17723 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17724 if (REG_P (operands[1]))
17725 insn = emit_move_insn (operands[1], tmp1);
17726 else
17728 /* Need a new scratch register since the old one has result
17729 of 8bit divide. */
17730 scratch = gen_reg_rtx (mode);
17731 emit_move_insn (scratch, tmp1);
17732 insn = emit_move_insn (operands[1], scratch);
17734 set_unique_reg_note (insn, REG_EQUAL, mod);
17736 /* Zero extend quotient from AL. */
17737 tmp1 = gen_lowpart (QImode, tmp0);
17738 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17739 set_unique_reg_note (insn, REG_EQUAL, div);
17741 emit_label (end_label);
17744 /* Whether it is OK to emit CFI directives when emitting asm code. */
17746 bool
17747 ix86_emit_cfi ()
17749 return dwarf2out_do_cfi_asm ();
17752 #define LEA_MAX_STALL (3)
17753 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17755 /* Increase given DISTANCE in half-cycles according to
17756 dependencies between PREV and NEXT instructions.
17757 Add 1 half-cycle if there is no dependency and
17758 go to next cycle if there is some dependecy. */
17760 static unsigned int
17761 increase_distance (rtx prev, rtx next, unsigned int distance)
17763 df_ref *use_rec;
17764 df_ref *def_rec;
17766 if (!prev || !next)
17767 return distance + (distance & 1) + 2;
17769 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17770 return distance + 1;
17772 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17773 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17774 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17775 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17776 return distance + (distance & 1) + 2;
17778 return distance + 1;
17781 /* Function checks if instruction INSN defines register number
17782 REGNO1 or REGNO2. */
17784 static bool
17785 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17786 rtx insn)
17788 df_ref *def_rec;
17790 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17791 if (DF_REF_REG_DEF_P (*def_rec)
17792 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17793 && (regno1 == DF_REF_REGNO (*def_rec)
17794 || regno2 == DF_REF_REGNO (*def_rec)))
17796 return true;
17799 return false;
17802 /* Function checks if instruction INSN uses register number
17803 REGNO as a part of address expression. */
17805 static bool
17806 insn_uses_reg_mem (unsigned int regno, rtx insn)
17808 df_ref *use_rec;
17810 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17811 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17812 return true;
17814 return false;
17817 /* Search backward for non-agu definition of register number REGNO1
17818 or register number REGNO2 in basic block starting from instruction
17819 START up to head of basic block or instruction INSN.
17821 Function puts true value into *FOUND var if definition was found
17822 and false otherwise.
17824 Distance in half-cycles between START and found instruction or head
17825 of BB is added to DISTANCE and returned. */
17827 static int
17828 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17829 rtx insn, int distance,
17830 rtx start, bool *found)
17832 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17833 rtx prev = start;
17834 rtx next = NULL;
17836 *found = false;
17838 while (prev
17839 && prev != insn
17840 && distance < LEA_SEARCH_THRESHOLD)
17842 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17844 distance = increase_distance (prev, next, distance);
17845 if (insn_defines_reg (regno1, regno2, prev))
17847 if (recog_memoized (prev) < 0
17848 || get_attr_type (prev) != TYPE_LEA)
17850 *found = true;
17851 return distance;
17855 next = prev;
17857 if (prev == BB_HEAD (bb))
17858 break;
17860 prev = PREV_INSN (prev);
17863 return distance;
17866 /* Search backward for non-agu definition of register number REGNO1
17867 or register number REGNO2 in INSN's basic block until
17868 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17869 2. Reach neighbour BBs boundary, or
17870 3. Reach agu definition.
17871 Returns the distance between the non-agu definition point and INSN.
17872 If no definition point, returns -1. */
17874 static int
17875 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17876 rtx insn)
17878 basic_block bb = BLOCK_FOR_INSN (insn);
17879 int distance = 0;
17880 bool found = false;
17882 if (insn != BB_HEAD (bb))
17883 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17884 distance, PREV_INSN (insn),
17885 &found);
17887 if (!found && distance < LEA_SEARCH_THRESHOLD)
17889 edge e;
17890 edge_iterator ei;
17891 bool simple_loop = false;
17893 FOR_EACH_EDGE (e, ei, bb->preds)
17894 if (e->src == bb)
17896 simple_loop = true;
17897 break;
17900 if (simple_loop)
17901 distance = distance_non_agu_define_in_bb (regno1, regno2,
17902 insn, distance,
17903 BB_END (bb), &found);
17904 else
17906 int shortest_dist = -1;
17907 bool found_in_bb = false;
17909 FOR_EACH_EDGE (e, ei, bb->preds)
17911 int bb_dist
17912 = distance_non_agu_define_in_bb (regno1, regno2,
17913 insn, distance,
17914 BB_END (e->src),
17915 &found_in_bb);
17916 if (found_in_bb)
17918 if (shortest_dist < 0)
17919 shortest_dist = bb_dist;
17920 else if (bb_dist > 0)
17921 shortest_dist = MIN (bb_dist, shortest_dist);
17923 found = true;
17927 distance = shortest_dist;
17931 /* get_attr_type may modify recog data. We want to make sure
17932 that recog data is valid for instruction INSN, on which
17933 distance_non_agu_define is called. INSN is unchanged here. */
17934 extract_insn_cached (insn);
17936 if (!found)
17937 return -1;
17939 return distance >> 1;
17942 /* Return the distance in half-cycles between INSN and the next
17943 insn that uses register number REGNO in memory address added
17944 to DISTANCE. Return -1 if REGNO0 is set.
17946 Put true value into *FOUND if register usage was found and
17947 false otherwise.
17948 Put true value into *REDEFINED if register redefinition was
17949 found and false otherwise. */
17951 static int
17952 distance_agu_use_in_bb (unsigned int regno,
17953 rtx insn, int distance, rtx start,
17954 bool *found, bool *redefined)
17956 basic_block bb = NULL;
17957 rtx next = start;
17958 rtx prev = NULL;
17960 *found = false;
17961 *redefined = false;
17963 if (start != NULL_RTX)
17965 bb = BLOCK_FOR_INSN (start);
17966 if (start != BB_HEAD (bb))
17967 /* If insn and start belong to the same bb, set prev to insn,
17968 so the call to increase_distance will increase the distance
17969 between insns by 1. */
17970 prev = insn;
17973 while (next
17974 && next != insn
17975 && distance < LEA_SEARCH_THRESHOLD)
17977 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17979 distance = increase_distance(prev, next, distance);
17980 if (insn_uses_reg_mem (regno, next))
17982 /* Return DISTANCE if OP0 is used in memory
17983 address in NEXT. */
17984 *found = true;
17985 return distance;
17988 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17990 /* Return -1 if OP0 is set in NEXT. */
17991 *redefined = true;
17992 return -1;
17995 prev = next;
17998 if (next == BB_END (bb))
17999 break;
18001 next = NEXT_INSN (next);
18004 return distance;
18007 /* Return the distance between INSN and the next insn that uses
18008 register number REGNO0 in memory address. Return -1 if no such
18009 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18011 static int
18012 distance_agu_use (unsigned int regno0, rtx insn)
18014 basic_block bb = BLOCK_FOR_INSN (insn);
18015 int distance = 0;
18016 bool found = false;
18017 bool redefined = false;
18019 if (insn != BB_END (bb))
18020 distance = distance_agu_use_in_bb (regno0, insn, distance,
18021 NEXT_INSN (insn),
18022 &found, &redefined);
18024 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18026 edge e;
18027 edge_iterator ei;
18028 bool simple_loop = false;
18030 FOR_EACH_EDGE (e, ei, bb->succs)
18031 if (e->dest == bb)
18033 simple_loop = true;
18034 break;
18037 if (simple_loop)
18038 distance = distance_agu_use_in_bb (regno0, insn,
18039 distance, BB_HEAD (bb),
18040 &found, &redefined);
18041 else
18043 int shortest_dist = -1;
18044 bool found_in_bb = false;
18045 bool redefined_in_bb = false;
18047 FOR_EACH_EDGE (e, ei, bb->succs)
18049 int bb_dist
18050 = distance_agu_use_in_bb (regno0, insn,
18051 distance, BB_HEAD (e->dest),
18052 &found_in_bb, &redefined_in_bb);
18053 if (found_in_bb)
18055 if (shortest_dist < 0)
18056 shortest_dist = bb_dist;
18057 else if (bb_dist > 0)
18058 shortest_dist = MIN (bb_dist, shortest_dist);
18060 found = true;
18064 distance = shortest_dist;
18068 if (!found || redefined)
18069 return -1;
18071 return distance >> 1;
18074 /* Define this macro to tune LEA priority vs ADD, it take effect when
18075 there is a dilemma of choicing LEA or ADD
18076 Negative value: ADD is more preferred than LEA
18077 Zero: Netrual
18078 Positive value: LEA is more preferred than ADD*/
18079 #define IX86_LEA_PRIORITY 0
18081 /* Return true if usage of lea INSN has performance advantage
18082 over a sequence of instructions. Instructions sequence has
18083 SPLIT_COST cycles higher latency than lea latency. */
18085 static bool
18086 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18087 unsigned int regno2, int split_cost, bool has_scale)
18089 int dist_define, dist_use;
18091 /* For Silvermont if using a 2-source or 3-source LEA for
18092 non-destructive destination purposes, or due to wanting
18093 ability to use SCALE, the use of LEA is justified. */
18094 if (TARGET_SILVERMONT || TARGET_INTEL)
18096 if (has_scale)
18097 return true;
18098 if (split_cost < 1)
18099 return false;
18100 if (regno0 == regno1 || regno0 == regno2)
18101 return false;
18102 return true;
18105 dist_define = distance_non_agu_define (regno1, regno2, insn);
18106 dist_use = distance_agu_use (regno0, insn);
18108 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18110 /* If there is no non AGU operand definition, no AGU
18111 operand usage and split cost is 0 then both lea
18112 and non lea variants have same priority. Currently
18113 we prefer lea for 64 bit code and non lea on 32 bit
18114 code. */
18115 if (dist_use < 0 && split_cost == 0)
18116 return TARGET_64BIT || IX86_LEA_PRIORITY;
18117 else
18118 return true;
18121 /* With longer definitions distance lea is more preferable.
18122 Here we change it to take into account splitting cost and
18123 lea priority. */
18124 dist_define += split_cost + IX86_LEA_PRIORITY;
18126 /* If there is no use in memory addess then we just check
18127 that split cost exceeds AGU stall. */
18128 if (dist_use < 0)
18129 return dist_define > LEA_MAX_STALL;
18131 /* If this insn has both backward non-agu dependence and forward
18132 agu dependence, the one with short distance takes effect. */
18133 return dist_define >= dist_use;
18136 /* Return true if it is legal to clobber flags by INSN and
18137 false otherwise. */
18139 static bool
18140 ix86_ok_to_clobber_flags (rtx insn)
18142 basic_block bb = BLOCK_FOR_INSN (insn);
18143 df_ref *use;
18144 bitmap live;
18146 while (insn)
18148 if (NONDEBUG_INSN_P (insn))
18150 for (use = DF_INSN_USES (insn); *use; use++)
18151 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18152 return false;
18154 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18155 return true;
18158 if (insn == BB_END (bb))
18159 break;
18161 insn = NEXT_INSN (insn);
18164 live = df_get_live_out(bb);
18165 return !REGNO_REG_SET_P (live, FLAGS_REG);
18168 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18169 move and add to avoid AGU stalls. */
18171 bool
18172 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18174 unsigned int regno0, regno1, regno2;
18176 /* Check if we need to optimize. */
18177 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18178 return false;
18180 /* Check it is correct to split here. */
18181 if (!ix86_ok_to_clobber_flags(insn))
18182 return false;
18184 regno0 = true_regnum (operands[0]);
18185 regno1 = true_regnum (operands[1]);
18186 regno2 = true_regnum (operands[2]);
18188 /* We need to split only adds with non destructive
18189 destination operand. */
18190 if (regno0 == regno1 || regno0 == regno2)
18191 return false;
18192 else
18193 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18196 /* Return true if we should emit lea instruction instead of mov
18197 instruction. */
18199 bool
18200 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18202 unsigned int regno0, regno1;
18204 /* Check if we need to optimize. */
18205 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18206 return false;
18208 /* Use lea for reg to reg moves only. */
18209 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18210 return false;
18212 regno0 = true_regnum (operands[0]);
18213 regno1 = true_regnum (operands[1]);
18215 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18218 /* Return true if we need to split lea into a sequence of
18219 instructions to avoid AGU stalls. */
18221 bool
18222 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18224 unsigned int regno0, regno1, regno2;
18225 int split_cost;
18226 struct ix86_address parts;
18227 int ok;
18229 /* Check we need to optimize. */
18230 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18231 return false;
18233 /* The "at least two components" test below might not catch simple
18234 move or zero extension insns if parts.base is non-NULL and parts.disp
18235 is const0_rtx as the only components in the address, e.g. if the
18236 register is %rbp or %r13. As this test is much cheaper and moves or
18237 zero extensions are the common case, do this check first. */
18238 if (REG_P (operands[1])
18239 || (SImode_address_operand (operands[1], VOIDmode)
18240 && REG_P (XEXP (operands[1], 0))))
18241 return false;
18243 /* Check if it is OK to split here. */
18244 if (!ix86_ok_to_clobber_flags (insn))
18245 return false;
18247 ok = ix86_decompose_address (operands[1], &parts);
18248 gcc_assert (ok);
18250 /* There should be at least two components in the address. */
18251 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18252 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18253 return false;
18255 /* We should not split into add if non legitimate pic
18256 operand is used as displacement. */
18257 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18258 return false;
18260 regno0 = true_regnum (operands[0]) ;
18261 regno1 = INVALID_REGNUM;
18262 regno2 = INVALID_REGNUM;
18264 if (parts.base)
18265 regno1 = true_regnum (parts.base);
18266 if (parts.index)
18267 regno2 = true_regnum (parts.index);
18269 split_cost = 0;
18271 /* Compute how many cycles we will add to execution time
18272 if split lea into a sequence of instructions. */
18273 if (parts.base || parts.index)
18275 /* Have to use mov instruction if non desctructive
18276 destination form is used. */
18277 if (regno1 != regno0 && regno2 != regno0)
18278 split_cost += 1;
18280 /* Have to add index to base if both exist. */
18281 if (parts.base && parts.index)
18282 split_cost += 1;
18284 /* Have to use shift and adds if scale is 2 or greater. */
18285 if (parts.scale > 1)
18287 if (regno0 != regno1)
18288 split_cost += 1;
18289 else if (regno2 == regno0)
18290 split_cost += 4;
18291 else
18292 split_cost += parts.scale;
18295 /* Have to use add instruction with immediate if
18296 disp is non zero. */
18297 if (parts.disp && parts.disp != const0_rtx)
18298 split_cost += 1;
18300 /* Subtract the price of lea. */
18301 split_cost -= 1;
18304 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18305 parts.scale > 1);
18308 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18309 matches destination. RTX includes clobber of FLAGS_REG. */
18311 static void
18312 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18313 rtx dst, rtx src)
18315 rtx op, clob;
18317 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18318 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18320 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18323 /* Return true if regno1 def is nearest to the insn. */
18325 static bool
18326 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18328 rtx prev = insn;
18329 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18331 if (insn == start)
18332 return false;
18333 while (prev && prev != start)
18335 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18337 prev = PREV_INSN (prev);
18338 continue;
18340 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18341 return true;
18342 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18343 return false;
18344 prev = PREV_INSN (prev);
18347 /* None of the regs is defined in the bb. */
18348 return false;
18351 /* Split lea instructions into a sequence of instructions
18352 which are executed on ALU to avoid AGU stalls.
18353 It is assumed that it is allowed to clobber flags register
18354 at lea position. */
18356 void
18357 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18359 unsigned int regno0, regno1, regno2;
18360 struct ix86_address parts;
18361 rtx target, tmp;
18362 int ok, adds;
18364 ok = ix86_decompose_address (operands[1], &parts);
18365 gcc_assert (ok);
18367 target = gen_lowpart (mode, operands[0]);
18369 regno0 = true_regnum (target);
18370 regno1 = INVALID_REGNUM;
18371 regno2 = INVALID_REGNUM;
18373 if (parts.base)
18375 parts.base = gen_lowpart (mode, parts.base);
18376 regno1 = true_regnum (parts.base);
18379 if (parts.index)
18381 parts.index = gen_lowpart (mode, parts.index);
18382 regno2 = true_regnum (parts.index);
18385 if (parts.disp)
18386 parts.disp = gen_lowpart (mode, parts.disp);
18388 if (parts.scale > 1)
18390 /* Case r1 = r1 + ... */
18391 if (regno1 == regno0)
18393 /* If we have a case r1 = r1 + C * r2 then we
18394 should use multiplication which is very
18395 expensive. Assume cost model is wrong if we
18396 have such case here. */
18397 gcc_assert (regno2 != regno0);
18399 for (adds = parts.scale; adds > 0; adds--)
18400 ix86_emit_binop (PLUS, mode, target, parts.index);
18402 else
18404 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18405 if (regno0 != regno2)
18406 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18408 /* Use shift for scaling. */
18409 ix86_emit_binop (ASHIFT, mode, target,
18410 GEN_INT (exact_log2 (parts.scale)));
18412 if (parts.base)
18413 ix86_emit_binop (PLUS, mode, target, parts.base);
18415 if (parts.disp && parts.disp != const0_rtx)
18416 ix86_emit_binop (PLUS, mode, target, parts.disp);
18419 else if (!parts.base && !parts.index)
18421 gcc_assert(parts.disp);
18422 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18424 else
18426 if (!parts.base)
18428 if (regno0 != regno2)
18429 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18431 else if (!parts.index)
18433 if (regno0 != regno1)
18434 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18436 else
18438 if (regno0 == regno1)
18439 tmp = parts.index;
18440 else if (regno0 == regno2)
18441 tmp = parts.base;
18442 else
18444 rtx tmp1;
18446 /* Find better operand for SET instruction, depending
18447 on which definition is farther from the insn. */
18448 if (find_nearest_reg_def (insn, regno1, regno2))
18449 tmp = parts.index, tmp1 = parts.base;
18450 else
18451 tmp = parts.base, tmp1 = parts.index;
18453 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18455 if (parts.disp && parts.disp != const0_rtx)
18456 ix86_emit_binop (PLUS, mode, target, parts.disp);
18458 ix86_emit_binop (PLUS, mode, target, tmp1);
18459 return;
18462 ix86_emit_binop (PLUS, mode, target, tmp);
18465 if (parts.disp && parts.disp != const0_rtx)
18466 ix86_emit_binop (PLUS, mode, target, parts.disp);
18470 /* Return true if it is ok to optimize an ADD operation to LEA
18471 operation to avoid flag register consumation. For most processors,
18472 ADD is faster than LEA. For the processors like BONNELL, if the
18473 destination register of LEA holds an actual address which will be
18474 used soon, LEA is better and otherwise ADD is better. */
18476 bool
18477 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18479 unsigned int regno0 = true_regnum (operands[0]);
18480 unsigned int regno1 = true_regnum (operands[1]);
18481 unsigned int regno2 = true_regnum (operands[2]);
18483 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18484 if (regno0 != regno1 && regno0 != regno2)
18485 return true;
18487 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18488 return false;
18490 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18493 /* Return true if destination reg of SET_BODY is shift count of
18494 USE_BODY. */
18496 static bool
18497 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18499 rtx set_dest;
18500 rtx shift_rtx;
18501 int i;
18503 /* Retrieve destination of SET_BODY. */
18504 switch (GET_CODE (set_body))
18506 case SET:
18507 set_dest = SET_DEST (set_body);
18508 if (!set_dest || !REG_P (set_dest))
18509 return false;
18510 break;
18511 case PARALLEL:
18512 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18513 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18514 use_body))
18515 return true;
18516 default:
18517 return false;
18518 break;
18521 /* Retrieve shift count of USE_BODY. */
18522 switch (GET_CODE (use_body))
18524 case SET:
18525 shift_rtx = XEXP (use_body, 1);
18526 break;
18527 case PARALLEL:
18528 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18529 if (ix86_dep_by_shift_count_body (set_body,
18530 XVECEXP (use_body, 0, i)))
18531 return true;
18532 default:
18533 return false;
18534 break;
18537 if (shift_rtx
18538 && (GET_CODE (shift_rtx) == ASHIFT
18539 || GET_CODE (shift_rtx) == LSHIFTRT
18540 || GET_CODE (shift_rtx) == ASHIFTRT
18541 || GET_CODE (shift_rtx) == ROTATE
18542 || GET_CODE (shift_rtx) == ROTATERT))
18544 rtx shift_count = XEXP (shift_rtx, 1);
18546 /* Return true if shift count is dest of SET_BODY. */
18547 if (REG_P (shift_count))
18549 /* Add check since it can be invoked before register
18550 allocation in pre-reload schedule. */
18551 if (reload_completed
18552 && true_regnum (set_dest) == true_regnum (shift_count))
18553 return true;
18554 else if (REGNO(set_dest) == REGNO(shift_count))
18555 return true;
18559 return false;
18562 /* Return true if destination reg of SET_INSN is shift count of
18563 USE_INSN. */
18565 bool
18566 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18568 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18569 PATTERN (use_insn));
18572 /* Return TRUE or FALSE depending on whether the unary operator meets the
18573 appropriate constraints. */
18575 bool
18576 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18577 enum machine_mode mode ATTRIBUTE_UNUSED,
18578 rtx operands[2])
18580 /* If one of operands is memory, source and destination must match. */
18581 if ((MEM_P (operands[0])
18582 || MEM_P (operands[1]))
18583 && ! rtx_equal_p (operands[0], operands[1]))
18584 return false;
18585 return true;
18588 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18589 are ok, keeping in mind the possible movddup alternative. */
18591 bool
18592 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18594 if (MEM_P (operands[0]))
18595 return rtx_equal_p (operands[0], operands[1 + high]);
18596 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18597 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18598 return true;
18601 /* Post-reload splitter for converting an SF or DFmode value in an
18602 SSE register into an unsigned SImode. */
18604 void
18605 ix86_split_convert_uns_si_sse (rtx operands[])
18607 enum machine_mode vecmode;
18608 rtx value, large, zero_or_two31, input, two31, x;
18610 large = operands[1];
18611 zero_or_two31 = operands[2];
18612 input = operands[3];
18613 two31 = operands[4];
18614 vecmode = GET_MODE (large);
18615 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18617 /* Load up the value into the low element. We must ensure that the other
18618 elements are valid floats -- zero is the easiest such value. */
18619 if (MEM_P (input))
18621 if (vecmode == V4SFmode)
18622 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18623 else
18624 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18626 else
18628 input = gen_rtx_REG (vecmode, REGNO (input));
18629 emit_move_insn (value, CONST0_RTX (vecmode));
18630 if (vecmode == V4SFmode)
18631 emit_insn (gen_sse_movss (value, value, input));
18632 else
18633 emit_insn (gen_sse2_movsd (value, value, input));
18636 emit_move_insn (large, two31);
18637 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18639 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18640 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18642 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18643 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18645 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18646 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18648 large = gen_rtx_REG (V4SImode, REGNO (large));
18649 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18651 x = gen_rtx_REG (V4SImode, REGNO (value));
18652 if (vecmode == V4SFmode)
18653 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18654 else
18655 emit_insn (gen_sse2_cvttpd2dq (x, value));
18656 value = x;
18658 emit_insn (gen_xorv4si3 (value, value, large));
18661 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18662 Expects the 64-bit DImode to be supplied in a pair of integral
18663 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18664 -mfpmath=sse, !optimize_size only. */
18666 void
18667 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18669 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18670 rtx int_xmm, fp_xmm;
18671 rtx biases, exponents;
18672 rtx x;
18674 int_xmm = gen_reg_rtx (V4SImode);
18675 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18676 emit_insn (gen_movdi_to_sse (int_xmm, input));
18677 else if (TARGET_SSE_SPLIT_REGS)
18679 emit_clobber (int_xmm);
18680 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18682 else
18684 x = gen_reg_rtx (V2DImode);
18685 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18686 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18689 x = gen_rtx_CONST_VECTOR (V4SImode,
18690 gen_rtvec (4, GEN_INT (0x43300000UL),
18691 GEN_INT (0x45300000UL),
18692 const0_rtx, const0_rtx));
18693 exponents = validize_mem (force_const_mem (V4SImode, x));
18695 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18696 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18698 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18699 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18700 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18701 (0x1.0p84 + double(fp_value_hi_xmm)).
18702 Note these exponents differ by 32. */
18704 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18706 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18707 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18708 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18709 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18710 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18711 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18712 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18713 biases = validize_mem (force_const_mem (V2DFmode, biases));
18714 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18716 /* Add the upper and lower DFmode values together. */
18717 if (TARGET_SSE3)
18718 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18719 else
18721 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18722 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18723 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18726 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18729 /* Not used, but eases macroization of patterns. */
18730 void
18731 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18732 rtx input ATTRIBUTE_UNUSED)
18734 gcc_unreachable ();
18737 /* Convert an unsigned SImode value into a DFmode. Only currently used
18738 for SSE, but applicable anywhere. */
18740 void
18741 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18743 REAL_VALUE_TYPE TWO31r;
18744 rtx x, fp;
18746 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18747 NULL, 1, OPTAB_DIRECT);
18749 fp = gen_reg_rtx (DFmode);
18750 emit_insn (gen_floatsidf2 (fp, x));
18752 real_ldexp (&TWO31r, &dconst1, 31);
18753 x = const_double_from_real_value (TWO31r, DFmode);
18755 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18756 if (x != target)
18757 emit_move_insn (target, x);
18760 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18761 32-bit mode; otherwise we have a direct convert instruction. */
18763 void
18764 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18766 REAL_VALUE_TYPE TWO32r;
18767 rtx fp_lo, fp_hi, x;
18769 fp_lo = gen_reg_rtx (DFmode);
18770 fp_hi = gen_reg_rtx (DFmode);
18772 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18774 real_ldexp (&TWO32r, &dconst1, 32);
18775 x = const_double_from_real_value (TWO32r, DFmode);
18776 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18778 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18780 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18781 0, OPTAB_DIRECT);
18782 if (x != target)
18783 emit_move_insn (target, x);
18786 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18787 For x86_32, -mfpmath=sse, !optimize_size only. */
18788 void
18789 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18791 REAL_VALUE_TYPE ONE16r;
18792 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18794 real_ldexp (&ONE16r, &dconst1, 16);
18795 x = const_double_from_real_value (ONE16r, SFmode);
18796 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18797 NULL, 0, OPTAB_DIRECT);
18798 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18799 NULL, 0, OPTAB_DIRECT);
18800 fp_hi = gen_reg_rtx (SFmode);
18801 fp_lo = gen_reg_rtx (SFmode);
18802 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18803 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18804 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18805 0, OPTAB_DIRECT);
18806 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18807 0, OPTAB_DIRECT);
18808 if (!rtx_equal_p (target, fp_hi))
18809 emit_move_insn (target, fp_hi);
18812 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18813 a vector of unsigned ints VAL to vector of floats TARGET. */
18815 void
18816 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18818 rtx tmp[8];
18819 REAL_VALUE_TYPE TWO16r;
18820 enum machine_mode intmode = GET_MODE (val);
18821 enum machine_mode fltmode = GET_MODE (target);
18822 rtx (*cvt) (rtx, rtx);
18824 if (intmode == V4SImode)
18825 cvt = gen_floatv4siv4sf2;
18826 else
18827 cvt = gen_floatv8siv8sf2;
18828 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18829 tmp[0] = force_reg (intmode, tmp[0]);
18830 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18831 OPTAB_DIRECT);
18832 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18833 NULL_RTX, 1, OPTAB_DIRECT);
18834 tmp[3] = gen_reg_rtx (fltmode);
18835 emit_insn (cvt (tmp[3], tmp[1]));
18836 tmp[4] = gen_reg_rtx (fltmode);
18837 emit_insn (cvt (tmp[4], tmp[2]));
18838 real_ldexp (&TWO16r, &dconst1, 16);
18839 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18840 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18841 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18842 OPTAB_DIRECT);
18843 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18844 OPTAB_DIRECT);
18845 if (tmp[7] != target)
18846 emit_move_insn (target, tmp[7]);
18849 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18850 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18851 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18852 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18855 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18857 REAL_VALUE_TYPE TWO31r;
18858 rtx two31r, tmp[4];
18859 enum machine_mode mode = GET_MODE (val);
18860 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18861 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18862 rtx (*cmp) (rtx, rtx, rtx, rtx);
18863 int i;
18865 for (i = 0; i < 3; i++)
18866 tmp[i] = gen_reg_rtx (mode);
18867 real_ldexp (&TWO31r, &dconst1, 31);
18868 two31r = const_double_from_real_value (TWO31r, scalarmode);
18869 two31r = ix86_build_const_vector (mode, 1, two31r);
18870 two31r = force_reg (mode, two31r);
18871 switch (mode)
18873 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18874 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18875 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18876 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18877 default: gcc_unreachable ();
18879 tmp[3] = gen_rtx_LE (mode, two31r, val);
18880 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18881 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18882 0, OPTAB_DIRECT);
18883 if (intmode == V4SImode || TARGET_AVX2)
18884 *xorp = expand_simple_binop (intmode, ASHIFT,
18885 gen_lowpart (intmode, tmp[0]),
18886 GEN_INT (31), NULL_RTX, 0,
18887 OPTAB_DIRECT);
18888 else
18890 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18891 two31 = ix86_build_const_vector (intmode, 1, two31);
18892 *xorp = expand_simple_binop (intmode, AND,
18893 gen_lowpart (intmode, tmp[0]),
18894 two31, NULL_RTX, 0,
18895 OPTAB_DIRECT);
18897 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18898 0, OPTAB_DIRECT);
18901 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18902 then replicate the value for all elements of the vector
18903 register. */
18906 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18908 int i, n_elt;
18909 rtvec v;
18910 enum machine_mode scalar_mode;
18912 switch (mode)
18914 case V64QImode:
18915 case V32QImode:
18916 case V16QImode:
18917 case V32HImode:
18918 case V16HImode:
18919 case V8HImode:
18920 case V16SImode:
18921 case V8SImode:
18922 case V4SImode:
18923 case V8DImode:
18924 case V4DImode:
18925 case V2DImode:
18926 gcc_assert (vect);
18927 case V16SFmode:
18928 case V8SFmode:
18929 case V4SFmode:
18930 case V8DFmode:
18931 case V4DFmode:
18932 case V2DFmode:
18933 n_elt = GET_MODE_NUNITS (mode);
18934 v = rtvec_alloc (n_elt);
18935 scalar_mode = GET_MODE_INNER (mode);
18937 RTVEC_ELT (v, 0) = value;
18939 for (i = 1; i < n_elt; ++i)
18940 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18942 return gen_rtx_CONST_VECTOR (mode, v);
18944 default:
18945 gcc_unreachable ();
18949 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18950 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18951 for an SSE register. If VECT is true, then replicate the mask for
18952 all elements of the vector register. If INVERT is true, then create
18953 a mask excluding the sign bit. */
18956 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18958 enum machine_mode vec_mode, imode;
18959 HOST_WIDE_INT hi, lo;
18960 int shift = 63;
18961 rtx v;
18962 rtx mask;
18964 /* Find the sign bit, sign extended to 2*HWI. */
18965 switch (mode)
18967 case V16SImode:
18968 case V16SFmode:
18969 case V8SImode:
18970 case V4SImode:
18971 case V8SFmode:
18972 case V4SFmode:
18973 vec_mode = mode;
18974 mode = GET_MODE_INNER (mode);
18975 imode = SImode;
18976 lo = 0x80000000, hi = lo < 0;
18977 break;
18979 case V8DImode:
18980 case V4DImode:
18981 case V2DImode:
18982 case V8DFmode:
18983 case V4DFmode:
18984 case V2DFmode:
18985 vec_mode = mode;
18986 mode = GET_MODE_INNER (mode);
18987 imode = DImode;
18988 if (HOST_BITS_PER_WIDE_INT >= 64)
18989 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18990 else
18991 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18992 break;
18994 case TImode:
18995 case TFmode:
18996 vec_mode = VOIDmode;
18997 if (HOST_BITS_PER_WIDE_INT >= 64)
18999 imode = TImode;
19000 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19002 else
19004 rtvec vec;
19006 imode = DImode;
19007 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19009 if (invert)
19011 lo = ~lo, hi = ~hi;
19012 v = constm1_rtx;
19014 else
19015 v = const0_rtx;
19017 mask = immed_double_const (lo, hi, imode);
19019 vec = gen_rtvec (2, v, mask);
19020 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19021 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19023 return v;
19025 break;
19027 default:
19028 gcc_unreachable ();
19031 if (invert)
19032 lo = ~lo, hi = ~hi;
19034 /* Force this value into the low part of a fp vector constant. */
19035 mask = immed_double_const (lo, hi, imode);
19036 mask = gen_lowpart (mode, mask);
19038 if (vec_mode == VOIDmode)
19039 return force_reg (mode, mask);
19041 v = ix86_build_const_vector (vec_mode, vect, mask);
19042 return force_reg (vec_mode, v);
19045 /* Generate code for floating point ABS or NEG. */
19047 void
19048 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19049 rtx operands[])
19051 rtx mask, set, dst, src;
19052 bool use_sse = false;
19053 bool vector_mode = VECTOR_MODE_P (mode);
19054 enum machine_mode vmode = mode;
19056 if (vector_mode)
19057 use_sse = true;
19058 else if (mode == TFmode)
19059 use_sse = true;
19060 else if (TARGET_SSE_MATH)
19062 use_sse = SSE_FLOAT_MODE_P (mode);
19063 if (mode == SFmode)
19064 vmode = V4SFmode;
19065 else if (mode == DFmode)
19066 vmode = V2DFmode;
19069 /* NEG and ABS performed with SSE use bitwise mask operations.
19070 Create the appropriate mask now. */
19071 if (use_sse)
19072 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19073 else
19074 mask = NULL_RTX;
19076 dst = operands[0];
19077 src = operands[1];
19079 set = gen_rtx_fmt_e (code, mode, src);
19080 set = gen_rtx_SET (VOIDmode, dst, set);
19082 if (mask)
19084 rtx use, clob;
19085 rtvec par;
19087 use = gen_rtx_USE (VOIDmode, mask);
19088 if (vector_mode)
19089 par = gen_rtvec (2, set, use);
19090 else
19092 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19093 par = gen_rtvec (3, set, use, clob);
19095 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19097 else
19098 emit_insn (set);
19101 /* Expand a copysign operation. Special case operand 0 being a constant. */
19103 void
19104 ix86_expand_copysign (rtx operands[])
19106 enum machine_mode mode, vmode;
19107 rtx dest, op0, op1, mask, nmask;
19109 dest = operands[0];
19110 op0 = operands[1];
19111 op1 = operands[2];
19113 mode = GET_MODE (dest);
19115 if (mode == SFmode)
19116 vmode = V4SFmode;
19117 else if (mode == DFmode)
19118 vmode = V2DFmode;
19119 else
19120 vmode = mode;
19122 if (GET_CODE (op0) == CONST_DOUBLE)
19124 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19126 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19127 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19129 if (mode == SFmode || mode == DFmode)
19131 if (op0 == CONST0_RTX (mode))
19132 op0 = CONST0_RTX (vmode);
19133 else
19135 rtx v = ix86_build_const_vector (vmode, false, op0);
19137 op0 = force_reg (vmode, v);
19140 else if (op0 != CONST0_RTX (mode))
19141 op0 = force_reg (mode, op0);
19143 mask = ix86_build_signbit_mask (vmode, 0, 0);
19145 if (mode == SFmode)
19146 copysign_insn = gen_copysignsf3_const;
19147 else if (mode == DFmode)
19148 copysign_insn = gen_copysigndf3_const;
19149 else
19150 copysign_insn = gen_copysigntf3_const;
19152 emit_insn (copysign_insn (dest, op0, op1, mask));
19154 else
19156 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19158 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19159 mask = ix86_build_signbit_mask (vmode, 0, 0);
19161 if (mode == SFmode)
19162 copysign_insn = gen_copysignsf3_var;
19163 else if (mode == DFmode)
19164 copysign_insn = gen_copysigndf3_var;
19165 else
19166 copysign_insn = gen_copysigntf3_var;
19168 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19172 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19173 be a constant, and so has already been expanded into a vector constant. */
19175 void
19176 ix86_split_copysign_const (rtx operands[])
19178 enum machine_mode mode, vmode;
19179 rtx dest, op0, mask, x;
19181 dest = operands[0];
19182 op0 = operands[1];
19183 mask = operands[3];
19185 mode = GET_MODE (dest);
19186 vmode = GET_MODE (mask);
19188 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19189 x = gen_rtx_AND (vmode, dest, mask);
19190 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19192 if (op0 != CONST0_RTX (vmode))
19194 x = gen_rtx_IOR (vmode, dest, op0);
19195 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19199 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19200 so we have to do two masks. */
19202 void
19203 ix86_split_copysign_var (rtx operands[])
19205 enum machine_mode mode, vmode;
19206 rtx dest, scratch, op0, op1, mask, nmask, x;
19208 dest = operands[0];
19209 scratch = operands[1];
19210 op0 = operands[2];
19211 op1 = operands[3];
19212 nmask = operands[4];
19213 mask = operands[5];
19215 mode = GET_MODE (dest);
19216 vmode = GET_MODE (mask);
19218 if (rtx_equal_p (op0, op1))
19220 /* Shouldn't happen often (it's useless, obviously), but when it does
19221 we'd generate incorrect code if we continue below. */
19222 emit_move_insn (dest, op0);
19223 return;
19226 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19228 gcc_assert (REGNO (op1) == REGNO (scratch));
19230 x = gen_rtx_AND (vmode, scratch, mask);
19231 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19233 dest = mask;
19234 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19235 x = gen_rtx_NOT (vmode, dest);
19236 x = gen_rtx_AND (vmode, x, op0);
19237 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19239 else
19241 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19243 x = gen_rtx_AND (vmode, scratch, mask);
19245 else /* alternative 2,4 */
19247 gcc_assert (REGNO (mask) == REGNO (scratch));
19248 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19249 x = gen_rtx_AND (vmode, scratch, op1);
19251 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19253 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19255 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19256 x = gen_rtx_AND (vmode, dest, nmask);
19258 else /* alternative 3,4 */
19260 gcc_assert (REGNO (nmask) == REGNO (dest));
19261 dest = nmask;
19262 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19263 x = gen_rtx_AND (vmode, dest, op0);
19265 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19268 x = gen_rtx_IOR (vmode, dest, scratch);
19269 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19272 /* Return TRUE or FALSE depending on whether the first SET in INSN
19273 has source and destination with matching CC modes, and that the
19274 CC mode is at least as constrained as REQ_MODE. */
19276 bool
19277 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19279 rtx set;
19280 enum machine_mode set_mode;
19282 set = PATTERN (insn);
19283 if (GET_CODE (set) == PARALLEL)
19284 set = XVECEXP (set, 0, 0);
19285 gcc_assert (GET_CODE (set) == SET);
19286 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19288 set_mode = GET_MODE (SET_DEST (set));
19289 switch (set_mode)
19291 case CCNOmode:
19292 if (req_mode != CCNOmode
19293 && (req_mode != CCmode
19294 || XEXP (SET_SRC (set), 1) != const0_rtx))
19295 return false;
19296 break;
19297 case CCmode:
19298 if (req_mode == CCGCmode)
19299 return false;
19300 /* FALLTHRU */
19301 case CCGCmode:
19302 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19303 return false;
19304 /* FALLTHRU */
19305 case CCGOCmode:
19306 if (req_mode == CCZmode)
19307 return false;
19308 /* FALLTHRU */
19309 case CCZmode:
19310 break;
19312 case CCAmode:
19313 case CCCmode:
19314 case CCOmode:
19315 case CCSmode:
19316 if (set_mode != req_mode)
19317 return false;
19318 break;
19320 default:
19321 gcc_unreachable ();
19324 return GET_MODE (SET_SRC (set)) == set_mode;
19327 /* Generate insn patterns to do an integer compare of OPERANDS. */
19329 static rtx
19330 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19332 enum machine_mode cmpmode;
19333 rtx tmp, flags;
19335 cmpmode = SELECT_CC_MODE (code, op0, op1);
19336 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19338 /* This is very simple, but making the interface the same as in the
19339 FP case makes the rest of the code easier. */
19340 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19341 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19343 /* Return the test that should be put into the flags user, i.e.
19344 the bcc, scc, or cmov instruction. */
19345 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19348 /* Figure out whether to use ordered or unordered fp comparisons.
19349 Return the appropriate mode to use. */
19351 enum machine_mode
19352 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19354 /* ??? In order to make all comparisons reversible, we do all comparisons
19355 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19356 all forms trapping and nontrapping comparisons, we can make inequality
19357 comparisons trapping again, since it results in better code when using
19358 FCOM based compares. */
19359 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19362 enum machine_mode
19363 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19365 enum machine_mode mode = GET_MODE (op0);
19367 if (SCALAR_FLOAT_MODE_P (mode))
19369 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19370 return ix86_fp_compare_mode (code);
19373 switch (code)
19375 /* Only zero flag is needed. */
19376 case EQ: /* ZF=0 */
19377 case NE: /* ZF!=0 */
19378 return CCZmode;
19379 /* Codes needing carry flag. */
19380 case GEU: /* CF=0 */
19381 case LTU: /* CF=1 */
19382 /* Detect overflow checks. They need just the carry flag. */
19383 if (GET_CODE (op0) == PLUS
19384 && rtx_equal_p (op1, XEXP (op0, 0)))
19385 return CCCmode;
19386 else
19387 return CCmode;
19388 case GTU: /* CF=0 & ZF=0 */
19389 case LEU: /* CF=1 | ZF=1 */
19390 return CCmode;
19391 /* Codes possibly doable only with sign flag when
19392 comparing against zero. */
19393 case GE: /* SF=OF or SF=0 */
19394 case LT: /* SF<>OF or SF=1 */
19395 if (op1 == const0_rtx)
19396 return CCGOCmode;
19397 else
19398 /* For other cases Carry flag is not required. */
19399 return CCGCmode;
19400 /* Codes doable only with sign flag when comparing
19401 against zero, but we miss jump instruction for it
19402 so we need to use relational tests against overflow
19403 that thus needs to be zero. */
19404 case GT: /* ZF=0 & SF=OF */
19405 case LE: /* ZF=1 | SF<>OF */
19406 if (op1 == const0_rtx)
19407 return CCNOmode;
19408 else
19409 return CCGCmode;
19410 /* strcmp pattern do (use flags) and combine may ask us for proper
19411 mode. */
19412 case USE:
19413 return CCmode;
19414 default:
19415 gcc_unreachable ();
19419 /* Return the fixed registers used for condition codes. */
19421 static bool
19422 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19424 *p1 = FLAGS_REG;
19425 *p2 = FPSR_REG;
19426 return true;
19429 /* If two condition code modes are compatible, return a condition code
19430 mode which is compatible with both. Otherwise, return
19431 VOIDmode. */
19433 static enum machine_mode
19434 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19436 if (m1 == m2)
19437 return m1;
19439 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19440 return VOIDmode;
19442 if ((m1 == CCGCmode && m2 == CCGOCmode)
19443 || (m1 == CCGOCmode && m2 == CCGCmode))
19444 return CCGCmode;
19446 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19447 return m2;
19448 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19449 return m1;
19451 switch (m1)
19453 default:
19454 gcc_unreachable ();
19456 case CCmode:
19457 case CCGCmode:
19458 case CCGOCmode:
19459 case CCNOmode:
19460 case CCAmode:
19461 case CCCmode:
19462 case CCOmode:
19463 case CCSmode:
19464 case CCZmode:
19465 switch (m2)
19467 default:
19468 return VOIDmode;
19470 case CCmode:
19471 case CCGCmode:
19472 case CCGOCmode:
19473 case CCNOmode:
19474 case CCAmode:
19475 case CCCmode:
19476 case CCOmode:
19477 case CCSmode:
19478 case CCZmode:
19479 return CCmode;
19482 case CCFPmode:
19483 case CCFPUmode:
19484 /* These are only compatible with themselves, which we already
19485 checked above. */
19486 return VOIDmode;
19491 /* Return a comparison we can do and that it is equivalent to
19492 swap_condition (code) apart possibly from orderedness.
19493 But, never change orderedness if TARGET_IEEE_FP, returning
19494 UNKNOWN in that case if necessary. */
19496 static enum rtx_code
19497 ix86_fp_swap_condition (enum rtx_code code)
19499 switch (code)
19501 case GT: /* GTU - CF=0 & ZF=0 */
19502 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19503 case GE: /* GEU - CF=0 */
19504 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19505 case UNLT: /* LTU - CF=1 */
19506 return TARGET_IEEE_FP ? UNKNOWN : GT;
19507 case UNLE: /* LEU - CF=1 | ZF=1 */
19508 return TARGET_IEEE_FP ? UNKNOWN : GE;
19509 default:
19510 return swap_condition (code);
19514 /* Return cost of comparison CODE using the best strategy for performance.
19515 All following functions do use number of instructions as a cost metrics.
19516 In future this should be tweaked to compute bytes for optimize_size and
19517 take into account performance of various instructions on various CPUs. */
19519 static int
19520 ix86_fp_comparison_cost (enum rtx_code code)
19522 int arith_cost;
19524 /* The cost of code using bit-twiddling on %ah. */
19525 switch (code)
19527 case UNLE:
19528 case UNLT:
19529 case LTGT:
19530 case GT:
19531 case GE:
19532 case UNORDERED:
19533 case ORDERED:
19534 case UNEQ:
19535 arith_cost = 4;
19536 break;
19537 case LT:
19538 case NE:
19539 case EQ:
19540 case UNGE:
19541 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19542 break;
19543 case LE:
19544 case UNGT:
19545 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19546 break;
19547 default:
19548 gcc_unreachable ();
19551 switch (ix86_fp_comparison_strategy (code))
19553 case IX86_FPCMP_COMI:
19554 return arith_cost > 4 ? 3 : 2;
19555 case IX86_FPCMP_SAHF:
19556 return arith_cost > 4 ? 4 : 3;
19557 default:
19558 return arith_cost;
19562 /* Return strategy to use for floating-point. We assume that fcomi is always
19563 preferrable where available, since that is also true when looking at size
19564 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19566 enum ix86_fpcmp_strategy
19567 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19569 /* Do fcomi/sahf based test when profitable. */
19571 if (TARGET_CMOVE)
19572 return IX86_FPCMP_COMI;
19574 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19575 return IX86_FPCMP_SAHF;
19577 return IX86_FPCMP_ARITH;
19580 /* Swap, force into registers, or otherwise massage the two operands
19581 to a fp comparison. The operands are updated in place; the new
19582 comparison code is returned. */
19584 static enum rtx_code
19585 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19587 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19588 rtx op0 = *pop0, op1 = *pop1;
19589 enum machine_mode op_mode = GET_MODE (op0);
19590 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19592 /* All of the unordered compare instructions only work on registers.
19593 The same is true of the fcomi compare instructions. The XFmode
19594 compare instructions require registers except when comparing
19595 against zero or when converting operand 1 from fixed point to
19596 floating point. */
19598 if (!is_sse
19599 && (fpcmp_mode == CCFPUmode
19600 || (op_mode == XFmode
19601 && ! (standard_80387_constant_p (op0) == 1
19602 || standard_80387_constant_p (op1) == 1)
19603 && GET_CODE (op1) != FLOAT)
19604 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19606 op0 = force_reg (op_mode, op0);
19607 op1 = force_reg (op_mode, op1);
19609 else
19611 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19612 things around if they appear profitable, otherwise force op0
19613 into a register. */
19615 if (standard_80387_constant_p (op0) == 0
19616 || (MEM_P (op0)
19617 && ! (standard_80387_constant_p (op1) == 0
19618 || MEM_P (op1))))
19620 enum rtx_code new_code = ix86_fp_swap_condition (code);
19621 if (new_code != UNKNOWN)
19623 rtx tmp;
19624 tmp = op0, op0 = op1, op1 = tmp;
19625 code = new_code;
19629 if (!REG_P (op0))
19630 op0 = force_reg (op_mode, op0);
19632 if (CONSTANT_P (op1))
19634 int tmp = standard_80387_constant_p (op1);
19635 if (tmp == 0)
19636 op1 = validize_mem (force_const_mem (op_mode, op1));
19637 else if (tmp == 1)
19639 if (TARGET_CMOVE)
19640 op1 = force_reg (op_mode, op1);
19642 else
19643 op1 = force_reg (op_mode, op1);
19647 /* Try to rearrange the comparison to make it cheaper. */
19648 if (ix86_fp_comparison_cost (code)
19649 > ix86_fp_comparison_cost (swap_condition (code))
19650 && (REG_P (op1) || can_create_pseudo_p ()))
19652 rtx tmp;
19653 tmp = op0, op0 = op1, op1 = tmp;
19654 code = swap_condition (code);
19655 if (!REG_P (op0))
19656 op0 = force_reg (op_mode, op0);
19659 *pop0 = op0;
19660 *pop1 = op1;
19661 return code;
19664 /* Convert comparison codes we use to represent FP comparison to integer
19665 code that will result in proper branch. Return UNKNOWN if no such code
19666 is available. */
19668 enum rtx_code
19669 ix86_fp_compare_code_to_integer (enum rtx_code code)
19671 switch (code)
19673 case GT:
19674 return GTU;
19675 case GE:
19676 return GEU;
19677 case ORDERED:
19678 case UNORDERED:
19679 return code;
19680 break;
19681 case UNEQ:
19682 return EQ;
19683 break;
19684 case UNLT:
19685 return LTU;
19686 break;
19687 case UNLE:
19688 return LEU;
19689 break;
19690 case LTGT:
19691 return NE;
19692 break;
19693 default:
19694 return UNKNOWN;
19698 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19700 static rtx
19701 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19703 enum machine_mode fpcmp_mode, intcmp_mode;
19704 rtx tmp, tmp2;
19706 fpcmp_mode = ix86_fp_compare_mode (code);
19707 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19709 /* Do fcomi/sahf based test when profitable. */
19710 switch (ix86_fp_comparison_strategy (code))
19712 case IX86_FPCMP_COMI:
19713 intcmp_mode = fpcmp_mode;
19714 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19715 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19716 tmp);
19717 emit_insn (tmp);
19718 break;
19720 case IX86_FPCMP_SAHF:
19721 intcmp_mode = fpcmp_mode;
19722 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19723 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19724 tmp);
19726 if (!scratch)
19727 scratch = gen_reg_rtx (HImode);
19728 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19729 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19730 break;
19732 case IX86_FPCMP_ARITH:
19733 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19734 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19735 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19736 if (!scratch)
19737 scratch = gen_reg_rtx (HImode);
19738 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19740 /* In the unordered case, we have to check C2 for NaN's, which
19741 doesn't happen to work out to anything nice combination-wise.
19742 So do some bit twiddling on the value we've got in AH to come
19743 up with an appropriate set of condition codes. */
19745 intcmp_mode = CCNOmode;
19746 switch (code)
19748 case GT:
19749 case UNGT:
19750 if (code == GT || !TARGET_IEEE_FP)
19752 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19753 code = EQ;
19755 else
19757 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19758 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19759 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19760 intcmp_mode = CCmode;
19761 code = GEU;
19763 break;
19764 case LT:
19765 case UNLT:
19766 if (code == LT && TARGET_IEEE_FP)
19768 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19769 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19770 intcmp_mode = CCmode;
19771 code = EQ;
19773 else
19775 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19776 code = NE;
19778 break;
19779 case GE:
19780 case UNGE:
19781 if (code == GE || !TARGET_IEEE_FP)
19783 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19784 code = EQ;
19786 else
19788 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19789 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19790 code = NE;
19792 break;
19793 case LE:
19794 case UNLE:
19795 if (code == LE && TARGET_IEEE_FP)
19797 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19798 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19799 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19800 intcmp_mode = CCmode;
19801 code = LTU;
19803 else
19805 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19806 code = NE;
19808 break;
19809 case EQ:
19810 case UNEQ:
19811 if (code == EQ && TARGET_IEEE_FP)
19813 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19814 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19815 intcmp_mode = CCmode;
19816 code = EQ;
19818 else
19820 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19821 code = NE;
19823 break;
19824 case NE:
19825 case LTGT:
19826 if (code == NE && TARGET_IEEE_FP)
19828 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19829 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19830 GEN_INT (0x40)));
19831 code = NE;
19833 else
19835 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19836 code = EQ;
19838 break;
19840 case UNORDERED:
19841 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19842 code = NE;
19843 break;
19844 case ORDERED:
19845 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19846 code = EQ;
19847 break;
19849 default:
19850 gcc_unreachable ();
19852 break;
19854 default:
19855 gcc_unreachable();
19858 /* Return the test that should be put into the flags user, i.e.
19859 the bcc, scc, or cmov instruction. */
19860 return gen_rtx_fmt_ee (code, VOIDmode,
19861 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19862 const0_rtx);
19865 static rtx
19866 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19868 rtx ret;
19870 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19871 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19873 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19875 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19876 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19878 else
19879 ret = ix86_expand_int_compare (code, op0, op1);
19881 return ret;
19884 void
19885 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19887 enum machine_mode mode = GET_MODE (op0);
19888 rtx tmp;
19890 switch (mode)
19892 case SFmode:
19893 case DFmode:
19894 case XFmode:
19895 case QImode:
19896 case HImode:
19897 case SImode:
19898 simple:
19899 tmp = ix86_expand_compare (code, op0, op1);
19900 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19901 gen_rtx_LABEL_REF (VOIDmode, label),
19902 pc_rtx);
19903 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19904 return;
19906 case DImode:
19907 if (TARGET_64BIT)
19908 goto simple;
19909 case TImode:
19910 /* Expand DImode branch into multiple compare+branch. */
19912 rtx lo[2], hi[2], label2;
19913 enum rtx_code code1, code2, code3;
19914 enum machine_mode submode;
19916 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19918 tmp = op0, op0 = op1, op1 = tmp;
19919 code = swap_condition (code);
19922 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19923 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19925 submode = mode == DImode ? SImode : DImode;
19927 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19928 avoid two branches. This costs one extra insn, so disable when
19929 optimizing for size. */
19931 if ((code == EQ || code == NE)
19932 && (!optimize_insn_for_size_p ()
19933 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19935 rtx xor0, xor1;
19937 xor1 = hi[0];
19938 if (hi[1] != const0_rtx)
19939 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19940 NULL_RTX, 0, OPTAB_WIDEN);
19942 xor0 = lo[0];
19943 if (lo[1] != const0_rtx)
19944 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19945 NULL_RTX, 0, OPTAB_WIDEN);
19947 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19948 NULL_RTX, 0, OPTAB_WIDEN);
19950 ix86_expand_branch (code, tmp, const0_rtx, label);
19951 return;
19954 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19955 op1 is a constant and the low word is zero, then we can just
19956 examine the high word. Similarly for low word -1 and
19957 less-or-equal-than or greater-than. */
19959 if (CONST_INT_P (hi[1]))
19960 switch (code)
19962 case LT: case LTU: case GE: case GEU:
19963 if (lo[1] == const0_rtx)
19965 ix86_expand_branch (code, hi[0], hi[1], label);
19966 return;
19968 break;
19969 case LE: case LEU: case GT: case GTU:
19970 if (lo[1] == constm1_rtx)
19972 ix86_expand_branch (code, hi[0], hi[1], label);
19973 return;
19975 break;
19976 default:
19977 break;
19980 /* Otherwise, we need two or three jumps. */
19982 label2 = gen_label_rtx ();
19984 code1 = code;
19985 code2 = swap_condition (code);
19986 code3 = unsigned_condition (code);
19988 switch (code)
19990 case LT: case GT: case LTU: case GTU:
19991 break;
19993 case LE: code1 = LT; code2 = GT; break;
19994 case GE: code1 = GT; code2 = LT; break;
19995 case LEU: code1 = LTU; code2 = GTU; break;
19996 case GEU: code1 = GTU; code2 = LTU; break;
19998 case EQ: code1 = UNKNOWN; code2 = NE; break;
19999 case NE: code2 = UNKNOWN; break;
20001 default:
20002 gcc_unreachable ();
20006 * a < b =>
20007 * if (hi(a) < hi(b)) goto true;
20008 * if (hi(a) > hi(b)) goto false;
20009 * if (lo(a) < lo(b)) goto true;
20010 * false:
20013 if (code1 != UNKNOWN)
20014 ix86_expand_branch (code1, hi[0], hi[1], label);
20015 if (code2 != UNKNOWN)
20016 ix86_expand_branch (code2, hi[0], hi[1], label2);
20018 ix86_expand_branch (code3, lo[0], lo[1], label);
20020 if (code2 != UNKNOWN)
20021 emit_label (label2);
20022 return;
20025 default:
20026 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20027 goto simple;
20031 /* Split branch based on floating point condition. */
20032 void
20033 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20034 rtx target1, rtx target2, rtx tmp)
20036 rtx condition;
20037 rtx i;
20039 if (target2 != pc_rtx)
20041 rtx tmp = target2;
20042 code = reverse_condition_maybe_unordered (code);
20043 target2 = target1;
20044 target1 = tmp;
20047 condition = ix86_expand_fp_compare (code, op1, op2,
20048 tmp);
20050 i = emit_jump_insn (gen_rtx_SET
20051 (VOIDmode, pc_rtx,
20052 gen_rtx_IF_THEN_ELSE (VOIDmode,
20053 condition, target1, target2)));
20054 if (split_branch_probability >= 0)
20055 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20058 void
20059 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20061 rtx ret;
20063 gcc_assert (GET_MODE (dest) == QImode);
20065 ret = ix86_expand_compare (code, op0, op1);
20066 PUT_MODE (ret, QImode);
20067 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20070 /* Expand comparison setting or clearing carry flag. Return true when
20071 successful and set pop for the operation. */
20072 static bool
20073 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20075 enum machine_mode mode =
20076 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20078 /* Do not handle double-mode compares that go through special path. */
20079 if (mode == (TARGET_64BIT ? TImode : DImode))
20080 return false;
20082 if (SCALAR_FLOAT_MODE_P (mode))
20084 rtx compare_op, compare_seq;
20086 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20088 /* Shortcut: following common codes never translate
20089 into carry flag compares. */
20090 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20091 || code == ORDERED || code == UNORDERED)
20092 return false;
20094 /* These comparisons require zero flag; swap operands so they won't. */
20095 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20096 && !TARGET_IEEE_FP)
20098 rtx tmp = op0;
20099 op0 = op1;
20100 op1 = tmp;
20101 code = swap_condition (code);
20104 /* Try to expand the comparison and verify that we end up with
20105 carry flag based comparison. This fails to be true only when
20106 we decide to expand comparison using arithmetic that is not
20107 too common scenario. */
20108 start_sequence ();
20109 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20110 compare_seq = get_insns ();
20111 end_sequence ();
20113 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20114 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20115 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20116 else
20117 code = GET_CODE (compare_op);
20119 if (code != LTU && code != GEU)
20120 return false;
20122 emit_insn (compare_seq);
20123 *pop = compare_op;
20124 return true;
20127 if (!INTEGRAL_MODE_P (mode))
20128 return false;
20130 switch (code)
20132 case LTU:
20133 case GEU:
20134 break;
20136 /* Convert a==0 into (unsigned)a<1. */
20137 case EQ:
20138 case NE:
20139 if (op1 != const0_rtx)
20140 return false;
20141 op1 = const1_rtx;
20142 code = (code == EQ ? LTU : GEU);
20143 break;
20145 /* Convert a>b into b<a or a>=b-1. */
20146 case GTU:
20147 case LEU:
20148 if (CONST_INT_P (op1))
20150 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20151 /* Bail out on overflow. We still can swap operands but that
20152 would force loading of the constant into register. */
20153 if (op1 == const0_rtx
20154 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20155 return false;
20156 code = (code == GTU ? GEU : LTU);
20158 else
20160 rtx tmp = op1;
20161 op1 = op0;
20162 op0 = tmp;
20163 code = (code == GTU ? LTU : GEU);
20165 break;
20167 /* Convert a>=0 into (unsigned)a<0x80000000. */
20168 case LT:
20169 case GE:
20170 if (mode == DImode || op1 != const0_rtx)
20171 return false;
20172 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20173 code = (code == LT ? GEU : LTU);
20174 break;
20175 case LE:
20176 case GT:
20177 if (mode == DImode || op1 != constm1_rtx)
20178 return false;
20179 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20180 code = (code == LE ? GEU : LTU);
20181 break;
20183 default:
20184 return false;
20186 /* Swapping operands may cause constant to appear as first operand. */
20187 if (!nonimmediate_operand (op0, VOIDmode))
20189 if (!can_create_pseudo_p ())
20190 return false;
20191 op0 = force_reg (mode, op0);
20193 *pop = ix86_expand_compare (code, op0, op1);
20194 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20195 return true;
20198 bool
20199 ix86_expand_int_movcc (rtx operands[])
20201 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20202 rtx compare_seq, compare_op;
20203 enum machine_mode mode = GET_MODE (operands[0]);
20204 bool sign_bit_compare_p = false;
20205 rtx op0 = XEXP (operands[1], 0);
20206 rtx op1 = XEXP (operands[1], 1);
20208 if (GET_MODE (op0) == TImode
20209 || (GET_MODE (op0) == DImode
20210 && !TARGET_64BIT))
20211 return false;
20213 start_sequence ();
20214 compare_op = ix86_expand_compare (code, op0, op1);
20215 compare_seq = get_insns ();
20216 end_sequence ();
20218 compare_code = GET_CODE (compare_op);
20220 if ((op1 == const0_rtx && (code == GE || code == LT))
20221 || (op1 == constm1_rtx && (code == GT || code == LE)))
20222 sign_bit_compare_p = true;
20224 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20225 HImode insns, we'd be swallowed in word prefix ops. */
20227 if ((mode != HImode || TARGET_FAST_PREFIX)
20228 && (mode != (TARGET_64BIT ? TImode : DImode))
20229 && CONST_INT_P (operands[2])
20230 && CONST_INT_P (operands[3]))
20232 rtx out = operands[0];
20233 HOST_WIDE_INT ct = INTVAL (operands[2]);
20234 HOST_WIDE_INT cf = INTVAL (operands[3]);
20235 HOST_WIDE_INT diff;
20237 diff = ct - cf;
20238 /* Sign bit compares are better done using shifts than we do by using
20239 sbb. */
20240 if (sign_bit_compare_p
20241 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20243 /* Detect overlap between destination and compare sources. */
20244 rtx tmp = out;
20246 if (!sign_bit_compare_p)
20248 rtx flags;
20249 bool fpcmp = false;
20251 compare_code = GET_CODE (compare_op);
20253 flags = XEXP (compare_op, 0);
20255 if (GET_MODE (flags) == CCFPmode
20256 || GET_MODE (flags) == CCFPUmode)
20258 fpcmp = true;
20259 compare_code
20260 = ix86_fp_compare_code_to_integer (compare_code);
20263 /* To simplify rest of code, restrict to the GEU case. */
20264 if (compare_code == LTU)
20266 HOST_WIDE_INT tmp = ct;
20267 ct = cf;
20268 cf = tmp;
20269 compare_code = reverse_condition (compare_code);
20270 code = reverse_condition (code);
20272 else
20274 if (fpcmp)
20275 PUT_CODE (compare_op,
20276 reverse_condition_maybe_unordered
20277 (GET_CODE (compare_op)));
20278 else
20279 PUT_CODE (compare_op,
20280 reverse_condition (GET_CODE (compare_op)));
20282 diff = ct - cf;
20284 if (reg_overlap_mentioned_p (out, op0)
20285 || reg_overlap_mentioned_p (out, op1))
20286 tmp = gen_reg_rtx (mode);
20288 if (mode == DImode)
20289 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20290 else
20291 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20292 flags, compare_op));
20294 else
20296 if (code == GT || code == GE)
20297 code = reverse_condition (code);
20298 else
20300 HOST_WIDE_INT tmp = ct;
20301 ct = cf;
20302 cf = tmp;
20303 diff = ct - cf;
20305 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20308 if (diff == 1)
20311 * cmpl op0,op1
20312 * sbbl dest,dest
20313 * [addl dest, ct]
20315 * Size 5 - 8.
20317 if (ct)
20318 tmp = expand_simple_binop (mode, PLUS,
20319 tmp, GEN_INT (ct),
20320 copy_rtx (tmp), 1, OPTAB_DIRECT);
20322 else if (cf == -1)
20325 * cmpl op0,op1
20326 * sbbl dest,dest
20327 * orl $ct, dest
20329 * Size 8.
20331 tmp = expand_simple_binop (mode, IOR,
20332 tmp, GEN_INT (ct),
20333 copy_rtx (tmp), 1, OPTAB_DIRECT);
20335 else if (diff == -1 && ct)
20338 * cmpl op0,op1
20339 * sbbl dest,dest
20340 * notl dest
20341 * [addl dest, cf]
20343 * Size 8 - 11.
20345 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20346 if (cf)
20347 tmp = expand_simple_binop (mode, PLUS,
20348 copy_rtx (tmp), GEN_INT (cf),
20349 copy_rtx (tmp), 1, OPTAB_DIRECT);
20351 else
20354 * cmpl op0,op1
20355 * sbbl dest,dest
20356 * [notl dest]
20357 * andl cf - ct, dest
20358 * [addl dest, ct]
20360 * Size 8 - 11.
20363 if (cf == 0)
20365 cf = ct;
20366 ct = 0;
20367 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20370 tmp = expand_simple_binop (mode, AND,
20371 copy_rtx (tmp),
20372 gen_int_mode (cf - ct, mode),
20373 copy_rtx (tmp), 1, OPTAB_DIRECT);
20374 if (ct)
20375 tmp = expand_simple_binop (mode, PLUS,
20376 copy_rtx (tmp), GEN_INT (ct),
20377 copy_rtx (tmp), 1, OPTAB_DIRECT);
20380 if (!rtx_equal_p (tmp, out))
20381 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20383 return true;
20386 if (diff < 0)
20388 enum machine_mode cmp_mode = GET_MODE (op0);
20390 HOST_WIDE_INT tmp;
20391 tmp = ct, ct = cf, cf = tmp;
20392 diff = -diff;
20394 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20396 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20398 /* We may be reversing unordered compare to normal compare, that
20399 is not valid in general (we may convert non-trapping condition
20400 to trapping one), however on i386 we currently emit all
20401 comparisons unordered. */
20402 compare_code = reverse_condition_maybe_unordered (compare_code);
20403 code = reverse_condition_maybe_unordered (code);
20405 else
20407 compare_code = reverse_condition (compare_code);
20408 code = reverse_condition (code);
20412 compare_code = UNKNOWN;
20413 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20414 && CONST_INT_P (op1))
20416 if (op1 == const0_rtx
20417 && (code == LT || code == GE))
20418 compare_code = code;
20419 else if (op1 == constm1_rtx)
20421 if (code == LE)
20422 compare_code = LT;
20423 else if (code == GT)
20424 compare_code = GE;
20428 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20429 if (compare_code != UNKNOWN
20430 && GET_MODE (op0) == GET_MODE (out)
20431 && (cf == -1 || ct == -1))
20433 /* If lea code below could be used, only optimize
20434 if it results in a 2 insn sequence. */
20436 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20437 || diff == 3 || diff == 5 || diff == 9)
20438 || (compare_code == LT && ct == -1)
20439 || (compare_code == GE && cf == -1))
20442 * notl op1 (if necessary)
20443 * sarl $31, op1
20444 * orl cf, op1
20446 if (ct != -1)
20448 cf = ct;
20449 ct = -1;
20450 code = reverse_condition (code);
20453 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20455 out = expand_simple_binop (mode, IOR,
20456 out, GEN_INT (cf),
20457 out, 1, OPTAB_DIRECT);
20458 if (out != operands[0])
20459 emit_move_insn (operands[0], out);
20461 return true;
20466 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20467 || diff == 3 || diff == 5 || diff == 9)
20468 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20469 && (mode != DImode
20470 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20473 * xorl dest,dest
20474 * cmpl op1,op2
20475 * setcc dest
20476 * lea cf(dest*(ct-cf)),dest
20478 * Size 14.
20480 * This also catches the degenerate setcc-only case.
20483 rtx tmp;
20484 int nops;
20486 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20488 nops = 0;
20489 /* On x86_64 the lea instruction operates on Pmode, so we need
20490 to get arithmetics done in proper mode to match. */
20491 if (diff == 1)
20492 tmp = copy_rtx (out);
20493 else
20495 rtx out1;
20496 out1 = copy_rtx (out);
20497 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20498 nops++;
20499 if (diff & 1)
20501 tmp = gen_rtx_PLUS (mode, tmp, out1);
20502 nops++;
20505 if (cf != 0)
20507 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20508 nops++;
20510 if (!rtx_equal_p (tmp, out))
20512 if (nops == 1)
20513 out = force_operand (tmp, copy_rtx (out));
20514 else
20515 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20517 if (!rtx_equal_p (out, operands[0]))
20518 emit_move_insn (operands[0], copy_rtx (out));
20520 return true;
20524 * General case: Jumpful:
20525 * xorl dest,dest cmpl op1, op2
20526 * cmpl op1, op2 movl ct, dest
20527 * setcc dest jcc 1f
20528 * decl dest movl cf, dest
20529 * andl (cf-ct),dest 1:
20530 * addl ct,dest
20532 * Size 20. Size 14.
20534 * This is reasonably steep, but branch mispredict costs are
20535 * high on modern cpus, so consider failing only if optimizing
20536 * for space.
20539 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20540 && BRANCH_COST (optimize_insn_for_speed_p (),
20541 false) >= 2)
20543 if (cf == 0)
20545 enum machine_mode cmp_mode = GET_MODE (op0);
20547 cf = ct;
20548 ct = 0;
20550 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20552 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20554 /* We may be reversing unordered compare to normal compare,
20555 that is not valid in general (we may convert non-trapping
20556 condition to trapping one), however on i386 we currently
20557 emit all comparisons unordered. */
20558 code = reverse_condition_maybe_unordered (code);
20560 else
20562 code = reverse_condition (code);
20563 if (compare_code != UNKNOWN)
20564 compare_code = reverse_condition (compare_code);
20568 if (compare_code != UNKNOWN)
20570 /* notl op1 (if needed)
20571 sarl $31, op1
20572 andl (cf-ct), op1
20573 addl ct, op1
20575 For x < 0 (resp. x <= -1) there will be no notl,
20576 so if possible swap the constants to get rid of the
20577 complement.
20578 True/false will be -1/0 while code below (store flag
20579 followed by decrement) is 0/-1, so the constants need
20580 to be exchanged once more. */
20582 if (compare_code == GE || !cf)
20584 code = reverse_condition (code);
20585 compare_code = LT;
20587 else
20589 HOST_WIDE_INT tmp = cf;
20590 cf = ct;
20591 ct = tmp;
20594 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20596 else
20598 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20600 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20601 constm1_rtx,
20602 copy_rtx (out), 1, OPTAB_DIRECT);
20605 out = expand_simple_binop (mode, AND, copy_rtx (out),
20606 gen_int_mode (cf - ct, mode),
20607 copy_rtx (out), 1, OPTAB_DIRECT);
20608 if (ct)
20609 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20610 copy_rtx (out), 1, OPTAB_DIRECT);
20611 if (!rtx_equal_p (out, operands[0]))
20612 emit_move_insn (operands[0], copy_rtx (out));
20614 return true;
20618 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20620 /* Try a few things more with specific constants and a variable. */
20622 optab op;
20623 rtx var, orig_out, out, tmp;
20625 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20626 return false;
20628 /* If one of the two operands is an interesting constant, load a
20629 constant with the above and mask it in with a logical operation. */
20631 if (CONST_INT_P (operands[2]))
20633 var = operands[3];
20634 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20635 operands[3] = constm1_rtx, op = and_optab;
20636 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20637 operands[3] = const0_rtx, op = ior_optab;
20638 else
20639 return false;
20641 else if (CONST_INT_P (operands[3]))
20643 var = operands[2];
20644 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20645 operands[2] = constm1_rtx, op = and_optab;
20646 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20647 operands[2] = const0_rtx, op = ior_optab;
20648 else
20649 return false;
20651 else
20652 return false;
20654 orig_out = operands[0];
20655 tmp = gen_reg_rtx (mode);
20656 operands[0] = tmp;
20658 /* Recurse to get the constant loaded. */
20659 if (ix86_expand_int_movcc (operands) == 0)
20660 return false;
20662 /* Mask in the interesting variable. */
20663 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20664 OPTAB_WIDEN);
20665 if (!rtx_equal_p (out, orig_out))
20666 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20668 return true;
20672 * For comparison with above,
20674 * movl cf,dest
20675 * movl ct,tmp
20676 * cmpl op1,op2
20677 * cmovcc tmp,dest
20679 * Size 15.
20682 if (! nonimmediate_operand (operands[2], mode))
20683 operands[2] = force_reg (mode, operands[2]);
20684 if (! nonimmediate_operand (operands[3], mode))
20685 operands[3] = force_reg (mode, operands[3]);
20687 if (! register_operand (operands[2], VOIDmode)
20688 && (mode == QImode
20689 || ! register_operand (operands[3], VOIDmode)))
20690 operands[2] = force_reg (mode, operands[2]);
20692 if (mode == QImode
20693 && ! register_operand (operands[3], VOIDmode))
20694 operands[3] = force_reg (mode, operands[3]);
20696 emit_insn (compare_seq);
20697 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20698 gen_rtx_IF_THEN_ELSE (mode,
20699 compare_op, operands[2],
20700 operands[3])));
20701 return true;
20704 /* Swap, force into registers, or otherwise massage the two operands
20705 to an sse comparison with a mask result. Thus we differ a bit from
20706 ix86_prepare_fp_compare_args which expects to produce a flags result.
20708 The DEST operand exists to help determine whether to commute commutative
20709 operators. The POP0/POP1 operands are updated in place. The new
20710 comparison code is returned, or UNKNOWN if not implementable. */
20712 static enum rtx_code
20713 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20714 rtx *pop0, rtx *pop1)
20716 rtx tmp;
20718 switch (code)
20720 case LTGT:
20721 case UNEQ:
20722 /* AVX supports all the needed comparisons. */
20723 if (TARGET_AVX)
20724 break;
20725 /* We have no LTGT as an operator. We could implement it with
20726 NE & ORDERED, but this requires an extra temporary. It's
20727 not clear that it's worth it. */
20728 return UNKNOWN;
20730 case LT:
20731 case LE:
20732 case UNGT:
20733 case UNGE:
20734 /* These are supported directly. */
20735 break;
20737 case EQ:
20738 case NE:
20739 case UNORDERED:
20740 case ORDERED:
20741 /* AVX has 3 operand comparisons, no need to swap anything. */
20742 if (TARGET_AVX)
20743 break;
20744 /* For commutative operators, try to canonicalize the destination
20745 operand to be first in the comparison - this helps reload to
20746 avoid extra moves. */
20747 if (!dest || !rtx_equal_p (dest, *pop1))
20748 break;
20749 /* FALLTHRU */
20751 case GE:
20752 case GT:
20753 case UNLE:
20754 case UNLT:
20755 /* These are not supported directly before AVX, and furthermore
20756 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20757 comparison operands to transform into something that is
20758 supported. */
20759 tmp = *pop0;
20760 *pop0 = *pop1;
20761 *pop1 = tmp;
20762 code = swap_condition (code);
20763 break;
20765 default:
20766 gcc_unreachable ();
20769 return code;
20772 /* Detect conditional moves that exactly match min/max operational
20773 semantics. Note that this is IEEE safe, as long as we don't
20774 interchange the operands.
20776 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20777 and TRUE if the operation is successful and instructions are emitted. */
20779 static bool
20780 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20781 rtx cmp_op1, rtx if_true, rtx if_false)
20783 enum machine_mode mode;
20784 bool is_min;
20785 rtx tmp;
20787 if (code == LT)
20789 else if (code == UNGE)
20791 tmp = if_true;
20792 if_true = if_false;
20793 if_false = tmp;
20795 else
20796 return false;
20798 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20799 is_min = true;
20800 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20801 is_min = false;
20802 else
20803 return false;
20805 mode = GET_MODE (dest);
20807 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20808 but MODE may be a vector mode and thus not appropriate. */
20809 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20811 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20812 rtvec v;
20814 if_true = force_reg (mode, if_true);
20815 v = gen_rtvec (2, if_true, if_false);
20816 tmp = gen_rtx_UNSPEC (mode, v, u);
20818 else
20820 code = is_min ? SMIN : SMAX;
20821 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20824 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20825 return true;
20828 /* Expand an sse vector comparison. Return the register with the result. */
20830 static rtx
20831 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20832 rtx op_true, rtx op_false)
20834 enum machine_mode mode = GET_MODE (dest);
20835 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20837 /* In general case result of comparison can differ from operands' type. */
20838 enum machine_mode cmp_mode;
20840 /* In AVX512F the result of comparison is an integer mask. */
20841 bool maskcmp = false;
20842 rtx x;
20844 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20846 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20847 gcc_assert (cmp_mode != BLKmode);
20849 maskcmp = true;
20851 else
20852 cmp_mode = cmp_ops_mode;
20855 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20856 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20857 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20859 if (optimize
20860 || reg_overlap_mentioned_p (dest, op_true)
20861 || reg_overlap_mentioned_p (dest, op_false))
20862 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20864 /* Compare patterns for int modes are unspec in AVX512F only. */
20865 if (maskcmp && (code == GT || code == EQ))
20867 rtx (*gen)(rtx, rtx, rtx);
20869 switch (cmp_ops_mode)
20871 case V16SImode:
20872 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20873 break;
20874 case V8DImode:
20875 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20876 break;
20877 default:
20878 gen = NULL;
20881 if (gen)
20883 emit_insn (gen (dest, cmp_op0, cmp_op1));
20884 return dest;
20887 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20889 if (cmp_mode != mode && !maskcmp)
20891 x = force_reg (cmp_ops_mode, x);
20892 convert_move (dest, x, false);
20894 else
20895 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20897 return dest;
20900 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20901 operations. This is used for both scalar and vector conditional moves. */
20903 static void
20904 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20906 enum machine_mode mode = GET_MODE (dest);
20907 enum machine_mode cmpmode = GET_MODE (cmp);
20909 /* In AVX512F the result of comparison is an integer mask. */
20910 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20912 rtx t2, t3, x;
20914 if (vector_all_ones_operand (op_true, mode)
20915 && rtx_equal_p (op_false, CONST0_RTX (mode))
20916 && !maskcmp)
20918 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20920 else if (op_false == CONST0_RTX (mode)
20921 && !maskcmp)
20923 op_true = force_reg (mode, op_true);
20924 x = gen_rtx_AND (mode, cmp, op_true);
20925 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20927 else if (op_true == CONST0_RTX (mode)
20928 && !maskcmp)
20930 op_false = force_reg (mode, op_false);
20931 x = gen_rtx_NOT (mode, cmp);
20932 x = gen_rtx_AND (mode, x, op_false);
20933 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20935 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20936 && !maskcmp)
20938 op_false = force_reg (mode, op_false);
20939 x = gen_rtx_IOR (mode, cmp, op_false);
20940 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20942 else if (TARGET_XOP
20943 && !maskcmp)
20945 op_true = force_reg (mode, op_true);
20947 if (!nonimmediate_operand (op_false, mode))
20948 op_false = force_reg (mode, op_false);
20950 emit_insn (gen_rtx_SET (mode, dest,
20951 gen_rtx_IF_THEN_ELSE (mode, cmp,
20952 op_true,
20953 op_false)));
20955 else
20957 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20958 rtx d = dest;
20960 if (!nonimmediate_operand (op_true, mode))
20961 op_true = force_reg (mode, op_true);
20963 op_false = force_reg (mode, op_false);
20965 switch (mode)
20967 case V4SFmode:
20968 if (TARGET_SSE4_1)
20969 gen = gen_sse4_1_blendvps;
20970 break;
20971 case V2DFmode:
20972 if (TARGET_SSE4_1)
20973 gen = gen_sse4_1_blendvpd;
20974 break;
20975 case V16QImode:
20976 case V8HImode:
20977 case V4SImode:
20978 case V2DImode:
20979 if (TARGET_SSE4_1)
20981 gen = gen_sse4_1_pblendvb;
20982 if (mode != V16QImode)
20983 d = gen_reg_rtx (V16QImode);
20984 op_false = gen_lowpart (V16QImode, op_false);
20985 op_true = gen_lowpart (V16QImode, op_true);
20986 cmp = gen_lowpart (V16QImode, cmp);
20988 break;
20989 case V8SFmode:
20990 if (TARGET_AVX)
20991 gen = gen_avx_blendvps256;
20992 break;
20993 case V4DFmode:
20994 if (TARGET_AVX)
20995 gen = gen_avx_blendvpd256;
20996 break;
20997 case V32QImode:
20998 case V16HImode:
20999 case V8SImode:
21000 case V4DImode:
21001 if (TARGET_AVX2)
21003 gen = gen_avx2_pblendvb;
21004 if (mode != V32QImode)
21005 d = gen_reg_rtx (V32QImode);
21006 op_false = gen_lowpart (V32QImode, op_false);
21007 op_true = gen_lowpart (V32QImode, op_true);
21008 cmp = gen_lowpart (V32QImode, cmp);
21010 break;
21012 case V16SImode:
21013 gen = gen_avx512f_blendmv16si;
21014 break;
21015 case V8DImode:
21016 gen = gen_avx512f_blendmv8di;
21017 break;
21018 case V8DFmode:
21019 gen = gen_avx512f_blendmv8df;
21020 break;
21021 case V16SFmode:
21022 gen = gen_avx512f_blendmv16sf;
21023 break;
21025 default:
21026 break;
21029 if (gen != NULL)
21031 emit_insn (gen (d, op_false, op_true, cmp));
21032 if (d != dest)
21033 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21035 else
21037 op_true = force_reg (mode, op_true);
21039 t2 = gen_reg_rtx (mode);
21040 if (optimize)
21041 t3 = gen_reg_rtx (mode);
21042 else
21043 t3 = dest;
21045 x = gen_rtx_AND (mode, op_true, cmp);
21046 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21048 x = gen_rtx_NOT (mode, cmp);
21049 x = gen_rtx_AND (mode, x, op_false);
21050 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21052 x = gen_rtx_IOR (mode, t3, t2);
21053 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21058 /* Expand a floating-point conditional move. Return true if successful. */
21060 bool
21061 ix86_expand_fp_movcc (rtx operands[])
21063 enum machine_mode mode = GET_MODE (operands[0]);
21064 enum rtx_code code = GET_CODE (operands[1]);
21065 rtx tmp, compare_op;
21066 rtx op0 = XEXP (operands[1], 0);
21067 rtx op1 = XEXP (operands[1], 1);
21069 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21071 enum machine_mode cmode;
21073 /* Since we've no cmove for sse registers, don't force bad register
21074 allocation just to gain access to it. Deny movcc when the
21075 comparison mode doesn't match the move mode. */
21076 cmode = GET_MODE (op0);
21077 if (cmode == VOIDmode)
21078 cmode = GET_MODE (op1);
21079 if (cmode != mode)
21080 return false;
21082 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21083 if (code == UNKNOWN)
21084 return false;
21086 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21087 operands[2], operands[3]))
21088 return true;
21090 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21091 operands[2], operands[3]);
21092 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21093 return true;
21096 if (GET_MODE (op0) == TImode
21097 || (GET_MODE (op0) == DImode
21098 && !TARGET_64BIT))
21099 return false;
21101 /* The floating point conditional move instructions don't directly
21102 support conditions resulting from a signed integer comparison. */
21104 compare_op = ix86_expand_compare (code, op0, op1);
21105 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21107 tmp = gen_reg_rtx (QImode);
21108 ix86_expand_setcc (tmp, code, op0, op1);
21110 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21113 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21114 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21115 operands[2], operands[3])));
21117 return true;
21120 /* Expand a floating-point vector conditional move; a vcond operation
21121 rather than a movcc operation. */
21123 bool
21124 ix86_expand_fp_vcond (rtx operands[])
21126 enum rtx_code code = GET_CODE (operands[3]);
21127 rtx cmp;
21129 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21130 &operands[4], &operands[5]);
21131 if (code == UNKNOWN)
21133 rtx temp;
21134 switch (GET_CODE (operands[3]))
21136 case LTGT:
21137 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21138 operands[5], operands[0], operands[0]);
21139 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21140 operands[5], operands[1], operands[2]);
21141 code = AND;
21142 break;
21143 case UNEQ:
21144 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21145 operands[5], operands[0], operands[0]);
21146 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21147 operands[5], operands[1], operands[2]);
21148 code = IOR;
21149 break;
21150 default:
21151 gcc_unreachable ();
21153 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21154 OPTAB_DIRECT);
21155 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21156 return true;
21159 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21160 operands[5], operands[1], operands[2]))
21161 return true;
21163 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21164 operands[1], operands[2]);
21165 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21166 return true;
21169 /* Expand a signed/unsigned integral vector conditional move. */
21171 bool
21172 ix86_expand_int_vcond (rtx operands[])
21174 enum machine_mode data_mode = GET_MODE (operands[0]);
21175 enum machine_mode mode = GET_MODE (operands[4]);
21176 enum rtx_code code = GET_CODE (operands[3]);
21177 bool negate = false;
21178 rtx x, cop0, cop1;
21180 cop0 = operands[4];
21181 cop1 = operands[5];
21183 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21184 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21185 if ((code == LT || code == GE)
21186 && data_mode == mode
21187 && cop1 == CONST0_RTX (mode)
21188 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21189 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21190 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21191 && (GET_MODE_SIZE (data_mode) == 16
21192 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21194 rtx negop = operands[2 - (code == LT)];
21195 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21196 if (negop == CONST1_RTX (data_mode))
21198 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21199 operands[0], 1, OPTAB_DIRECT);
21200 if (res != operands[0])
21201 emit_move_insn (operands[0], res);
21202 return true;
21204 else if (GET_MODE_INNER (data_mode) != DImode
21205 && vector_all_ones_operand (negop, data_mode))
21207 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21208 operands[0], 0, OPTAB_DIRECT);
21209 if (res != operands[0])
21210 emit_move_insn (operands[0], res);
21211 return true;
21215 if (!nonimmediate_operand (cop1, mode))
21216 cop1 = force_reg (mode, cop1);
21217 if (!general_operand (operands[1], data_mode))
21218 operands[1] = force_reg (data_mode, operands[1]);
21219 if (!general_operand (operands[2], data_mode))
21220 operands[2] = force_reg (data_mode, operands[2]);
21222 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21223 if (TARGET_XOP
21224 && (mode == V16QImode || mode == V8HImode
21225 || mode == V4SImode || mode == V2DImode))
21227 else
21229 /* Canonicalize the comparison to EQ, GT, GTU. */
21230 switch (code)
21232 case EQ:
21233 case GT:
21234 case GTU:
21235 break;
21237 case NE:
21238 case LE:
21239 case LEU:
21240 code = reverse_condition (code);
21241 negate = true;
21242 break;
21244 case GE:
21245 case GEU:
21246 code = reverse_condition (code);
21247 negate = true;
21248 /* FALLTHRU */
21250 case LT:
21251 case LTU:
21252 code = swap_condition (code);
21253 x = cop0, cop0 = cop1, cop1 = x;
21254 break;
21256 default:
21257 gcc_unreachable ();
21260 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21261 if (mode == V2DImode)
21263 switch (code)
21265 case EQ:
21266 /* SSE4.1 supports EQ. */
21267 if (!TARGET_SSE4_1)
21268 return false;
21269 break;
21271 case GT:
21272 case GTU:
21273 /* SSE4.2 supports GT/GTU. */
21274 if (!TARGET_SSE4_2)
21275 return false;
21276 break;
21278 default:
21279 gcc_unreachable ();
21283 /* Unsigned parallel compare is not supported by the hardware.
21284 Play some tricks to turn this into a signed comparison
21285 against 0. */
21286 if (code == GTU)
21288 cop0 = force_reg (mode, cop0);
21290 switch (mode)
21292 case V16SImode:
21293 case V8DImode:
21294 case V8SImode:
21295 case V4DImode:
21296 case V4SImode:
21297 case V2DImode:
21299 rtx t1, t2, mask;
21300 rtx (*gen_sub3) (rtx, rtx, rtx);
21302 switch (mode)
21304 case V16SImode: gen_sub3 = gen_subv16si3; break;
21305 case V8DImode: gen_sub3 = gen_subv8di3; break;
21306 case V8SImode: gen_sub3 = gen_subv8si3; break;
21307 case V4DImode: gen_sub3 = gen_subv4di3; break;
21308 case V4SImode: gen_sub3 = gen_subv4si3; break;
21309 case V2DImode: gen_sub3 = gen_subv2di3; break;
21310 default:
21311 gcc_unreachable ();
21313 /* Subtract (-(INT MAX) - 1) from both operands to make
21314 them signed. */
21315 mask = ix86_build_signbit_mask (mode, true, false);
21316 t1 = gen_reg_rtx (mode);
21317 emit_insn (gen_sub3 (t1, cop0, mask));
21319 t2 = gen_reg_rtx (mode);
21320 emit_insn (gen_sub3 (t2, cop1, mask));
21322 cop0 = t1;
21323 cop1 = t2;
21324 code = GT;
21326 break;
21328 case V32QImode:
21329 case V16HImode:
21330 case V16QImode:
21331 case V8HImode:
21332 /* Perform a parallel unsigned saturating subtraction. */
21333 x = gen_reg_rtx (mode);
21334 emit_insn (gen_rtx_SET (VOIDmode, x,
21335 gen_rtx_US_MINUS (mode, cop0, cop1)));
21337 cop0 = x;
21338 cop1 = CONST0_RTX (mode);
21339 code = EQ;
21340 negate = !negate;
21341 break;
21343 default:
21344 gcc_unreachable ();
21349 /* Allow the comparison to be done in one mode, but the movcc to
21350 happen in another mode. */
21351 if (data_mode == mode)
21353 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21354 operands[1+negate], operands[2-negate]);
21356 else
21358 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21359 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21360 operands[1+negate], operands[2-negate]);
21361 if (GET_MODE (x) == mode)
21362 x = gen_lowpart (data_mode, x);
21365 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21366 operands[2-negate]);
21367 return true;
21370 static bool
21371 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21373 enum machine_mode mode = GET_MODE (op0);
21374 switch (mode)
21376 case V16SImode:
21377 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21378 force_reg (V16SImode, mask),
21379 op1));
21380 return true;
21381 case V16SFmode:
21382 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21383 force_reg (V16SImode, mask),
21384 op1));
21385 return true;
21386 case V8DImode:
21387 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21388 force_reg (V8DImode, mask), op1));
21389 return true;
21390 case V8DFmode:
21391 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21392 force_reg (V8DImode, mask), op1));
21393 return true;
21394 default:
21395 return false;
21399 /* Expand a variable vector permutation. */
21401 void
21402 ix86_expand_vec_perm (rtx operands[])
21404 rtx target = operands[0];
21405 rtx op0 = operands[1];
21406 rtx op1 = operands[2];
21407 rtx mask = operands[3];
21408 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21409 enum machine_mode mode = GET_MODE (op0);
21410 enum machine_mode maskmode = GET_MODE (mask);
21411 int w, e, i;
21412 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21414 /* Number of elements in the vector. */
21415 w = GET_MODE_NUNITS (mode);
21416 e = GET_MODE_UNIT_SIZE (mode);
21417 gcc_assert (w <= 64);
21419 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21420 return;
21422 if (TARGET_AVX2)
21424 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21426 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21427 an constant shuffle operand. With a tiny bit of effort we can
21428 use VPERMD instead. A re-interpretation stall for V4DFmode is
21429 unfortunate but there's no avoiding it.
21430 Similarly for V16HImode we don't have instructions for variable
21431 shuffling, while for V32QImode we can use after preparing suitable
21432 masks vpshufb; vpshufb; vpermq; vpor. */
21434 if (mode == V16HImode)
21436 maskmode = mode = V32QImode;
21437 w = 32;
21438 e = 1;
21440 else
21442 maskmode = mode = V8SImode;
21443 w = 8;
21444 e = 4;
21446 t1 = gen_reg_rtx (maskmode);
21448 /* Replicate the low bits of the V4DImode mask into V8SImode:
21449 mask = { A B C D }
21450 t1 = { A A B B C C D D }. */
21451 for (i = 0; i < w / 2; ++i)
21452 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21453 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21454 vt = force_reg (maskmode, vt);
21455 mask = gen_lowpart (maskmode, mask);
21456 if (maskmode == V8SImode)
21457 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21458 else
21459 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21461 /* Multiply the shuffle indicies by two. */
21462 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21463 OPTAB_DIRECT);
21465 /* Add one to the odd shuffle indicies:
21466 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21467 for (i = 0; i < w / 2; ++i)
21469 vec[i * 2] = const0_rtx;
21470 vec[i * 2 + 1] = const1_rtx;
21472 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21473 vt = validize_mem (force_const_mem (maskmode, vt));
21474 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21475 OPTAB_DIRECT);
21477 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21478 operands[3] = mask = t1;
21479 target = gen_reg_rtx (mode);
21480 op0 = gen_lowpart (mode, op0);
21481 op1 = gen_lowpart (mode, op1);
21484 switch (mode)
21486 case V8SImode:
21487 /* The VPERMD and VPERMPS instructions already properly ignore
21488 the high bits of the shuffle elements. No need for us to
21489 perform an AND ourselves. */
21490 if (one_operand_shuffle)
21492 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21493 if (target != operands[0])
21494 emit_move_insn (operands[0],
21495 gen_lowpart (GET_MODE (operands[0]), target));
21497 else
21499 t1 = gen_reg_rtx (V8SImode);
21500 t2 = gen_reg_rtx (V8SImode);
21501 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21502 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21503 goto merge_two;
21505 return;
21507 case V8SFmode:
21508 mask = gen_lowpart (V8SImode, mask);
21509 if (one_operand_shuffle)
21510 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21511 else
21513 t1 = gen_reg_rtx (V8SFmode);
21514 t2 = gen_reg_rtx (V8SFmode);
21515 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21516 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21517 goto merge_two;
21519 return;
21521 case V4SImode:
21522 /* By combining the two 128-bit input vectors into one 256-bit
21523 input vector, we can use VPERMD and VPERMPS for the full
21524 two-operand shuffle. */
21525 t1 = gen_reg_rtx (V8SImode);
21526 t2 = gen_reg_rtx (V8SImode);
21527 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21528 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21529 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21530 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21531 return;
21533 case V4SFmode:
21534 t1 = gen_reg_rtx (V8SFmode);
21535 t2 = gen_reg_rtx (V8SImode);
21536 mask = gen_lowpart (V4SImode, mask);
21537 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21538 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21539 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21540 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21541 return;
21543 case V32QImode:
21544 t1 = gen_reg_rtx (V32QImode);
21545 t2 = gen_reg_rtx (V32QImode);
21546 t3 = gen_reg_rtx (V32QImode);
21547 vt2 = GEN_INT (-128);
21548 for (i = 0; i < 32; i++)
21549 vec[i] = vt2;
21550 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21551 vt = force_reg (V32QImode, vt);
21552 for (i = 0; i < 32; i++)
21553 vec[i] = i < 16 ? vt2 : const0_rtx;
21554 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21555 vt2 = force_reg (V32QImode, vt2);
21556 /* From mask create two adjusted masks, which contain the same
21557 bits as mask in the low 7 bits of each vector element.
21558 The first mask will have the most significant bit clear
21559 if it requests element from the same 128-bit lane
21560 and MSB set if it requests element from the other 128-bit lane.
21561 The second mask will have the opposite values of the MSB,
21562 and additionally will have its 128-bit lanes swapped.
21563 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21564 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21565 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21566 stands for other 12 bytes. */
21567 /* The bit whether element is from the same lane or the other
21568 lane is bit 4, so shift it up by 3 to the MSB position. */
21569 t5 = gen_reg_rtx (V4DImode);
21570 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21571 GEN_INT (3)));
21572 /* Clear MSB bits from the mask just in case it had them set. */
21573 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21574 /* After this t1 will have MSB set for elements from other lane. */
21575 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21576 /* Clear bits other than MSB. */
21577 emit_insn (gen_andv32qi3 (t1, t1, vt));
21578 /* Or in the lower bits from mask into t3. */
21579 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21580 /* And invert MSB bits in t1, so MSB is set for elements from the same
21581 lane. */
21582 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21583 /* Swap 128-bit lanes in t3. */
21584 t6 = gen_reg_rtx (V4DImode);
21585 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21586 const2_rtx, GEN_INT (3),
21587 const0_rtx, const1_rtx));
21588 /* And or in the lower bits from mask into t1. */
21589 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21590 if (one_operand_shuffle)
21592 /* Each of these shuffles will put 0s in places where
21593 element from the other 128-bit lane is needed, otherwise
21594 will shuffle in the requested value. */
21595 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21596 gen_lowpart (V32QImode, t6)));
21597 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21598 /* For t3 the 128-bit lanes are swapped again. */
21599 t7 = gen_reg_rtx (V4DImode);
21600 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21601 const2_rtx, GEN_INT (3),
21602 const0_rtx, const1_rtx));
21603 /* And oring both together leads to the result. */
21604 emit_insn (gen_iorv32qi3 (target, t1,
21605 gen_lowpart (V32QImode, t7)));
21606 if (target != operands[0])
21607 emit_move_insn (operands[0],
21608 gen_lowpart (GET_MODE (operands[0]), target));
21609 return;
21612 t4 = gen_reg_rtx (V32QImode);
21613 /* Similarly to the above one_operand_shuffle code,
21614 just for repeated twice for each operand. merge_two:
21615 code will merge the two results together. */
21616 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21617 gen_lowpart (V32QImode, t6)));
21618 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21619 gen_lowpart (V32QImode, t6)));
21620 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21621 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21622 t7 = gen_reg_rtx (V4DImode);
21623 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21624 const2_rtx, GEN_INT (3),
21625 const0_rtx, const1_rtx));
21626 t8 = gen_reg_rtx (V4DImode);
21627 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21628 const2_rtx, GEN_INT (3),
21629 const0_rtx, const1_rtx));
21630 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21631 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21632 t1 = t4;
21633 t2 = t3;
21634 goto merge_two;
21636 default:
21637 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21638 break;
21642 if (TARGET_XOP)
21644 /* The XOP VPPERM insn supports three inputs. By ignoring the
21645 one_operand_shuffle special case, we avoid creating another
21646 set of constant vectors in memory. */
21647 one_operand_shuffle = false;
21649 /* mask = mask & {2*w-1, ...} */
21650 vt = GEN_INT (2*w - 1);
21652 else
21654 /* mask = mask & {w-1, ...} */
21655 vt = GEN_INT (w - 1);
21658 for (i = 0; i < w; i++)
21659 vec[i] = vt;
21660 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21661 mask = expand_simple_binop (maskmode, AND, mask, vt,
21662 NULL_RTX, 0, OPTAB_DIRECT);
21664 /* For non-QImode operations, convert the word permutation control
21665 into a byte permutation control. */
21666 if (mode != V16QImode)
21668 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21669 GEN_INT (exact_log2 (e)),
21670 NULL_RTX, 0, OPTAB_DIRECT);
21672 /* Convert mask to vector of chars. */
21673 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21675 /* Replicate each of the input bytes into byte positions:
21676 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21677 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21678 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21679 for (i = 0; i < 16; ++i)
21680 vec[i] = GEN_INT (i/e * e);
21681 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21682 vt = validize_mem (force_const_mem (V16QImode, vt));
21683 if (TARGET_XOP)
21684 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21685 else
21686 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21688 /* Convert it into the byte positions by doing
21689 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21690 for (i = 0; i < 16; ++i)
21691 vec[i] = GEN_INT (i % e);
21692 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21693 vt = validize_mem (force_const_mem (V16QImode, vt));
21694 emit_insn (gen_addv16qi3 (mask, mask, vt));
21697 /* The actual shuffle operations all operate on V16QImode. */
21698 op0 = gen_lowpart (V16QImode, op0);
21699 op1 = gen_lowpart (V16QImode, op1);
21701 if (TARGET_XOP)
21703 if (GET_MODE (target) != V16QImode)
21704 target = gen_reg_rtx (V16QImode);
21705 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21706 if (target != operands[0])
21707 emit_move_insn (operands[0],
21708 gen_lowpart (GET_MODE (operands[0]), target));
21710 else if (one_operand_shuffle)
21712 if (GET_MODE (target) != V16QImode)
21713 target = gen_reg_rtx (V16QImode);
21714 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21715 if (target != operands[0])
21716 emit_move_insn (operands[0],
21717 gen_lowpart (GET_MODE (operands[0]), target));
21719 else
21721 rtx xops[6];
21722 bool ok;
21724 /* Shuffle the two input vectors independently. */
21725 t1 = gen_reg_rtx (V16QImode);
21726 t2 = gen_reg_rtx (V16QImode);
21727 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21728 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21730 merge_two:
21731 /* Then merge them together. The key is whether any given control
21732 element contained a bit set that indicates the second word. */
21733 mask = operands[3];
21734 vt = GEN_INT (w);
21735 if (maskmode == V2DImode && !TARGET_SSE4_1)
21737 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21738 more shuffle to convert the V2DI input mask into a V4SI
21739 input mask. At which point the masking that expand_int_vcond
21740 will work as desired. */
21741 rtx t3 = gen_reg_rtx (V4SImode);
21742 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21743 const0_rtx, const0_rtx,
21744 const2_rtx, const2_rtx));
21745 mask = t3;
21746 maskmode = V4SImode;
21747 e = w = 4;
21750 for (i = 0; i < w; i++)
21751 vec[i] = vt;
21752 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21753 vt = force_reg (maskmode, vt);
21754 mask = expand_simple_binop (maskmode, AND, mask, vt,
21755 NULL_RTX, 0, OPTAB_DIRECT);
21757 if (GET_MODE (target) != mode)
21758 target = gen_reg_rtx (mode);
21759 xops[0] = target;
21760 xops[1] = gen_lowpart (mode, t2);
21761 xops[2] = gen_lowpart (mode, t1);
21762 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21763 xops[4] = mask;
21764 xops[5] = vt;
21765 ok = ix86_expand_int_vcond (xops);
21766 gcc_assert (ok);
21767 if (target != operands[0])
21768 emit_move_insn (operands[0],
21769 gen_lowpart (GET_MODE (operands[0]), target));
21773 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21774 true if we should do zero extension, else sign extension. HIGH_P is
21775 true if we want the N/2 high elements, else the low elements. */
21777 void
21778 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21780 enum machine_mode imode = GET_MODE (src);
21781 rtx tmp;
21783 if (TARGET_SSE4_1)
21785 rtx (*unpack)(rtx, rtx);
21786 rtx (*extract)(rtx, rtx) = NULL;
21787 enum machine_mode halfmode = BLKmode;
21789 switch (imode)
21791 case V32QImode:
21792 if (unsigned_p)
21793 unpack = gen_avx2_zero_extendv16qiv16hi2;
21794 else
21795 unpack = gen_avx2_sign_extendv16qiv16hi2;
21796 halfmode = V16QImode;
21797 extract
21798 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21799 break;
21800 case V32HImode:
21801 if (unsigned_p)
21802 unpack = gen_avx512f_zero_extendv16hiv16si2;
21803 else
21804 unpack = gen_avx512f_sign_extendv16hiv16si2;
21805 halfmode = V16HImode;
21806 extract
21807 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21808 break;
21809 case V16HImode:
21810 if (unsigned_p)
21811 unpack = gen_avx2_zero_extendv8hiv8si2;
21812 else
21813 unpack = gen_avx2_sign_extendv8hiv8si2;
21814 halfmode = V8HImode;
21815 extract
21816 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21817 break;
21818 case V16SImode:
21819 if (unsigned_p)
21820 unpack = gen_avx512f_zero_extendv8siv8di2;
21821 else
21822 unpack = gen_avx512f_sign_extendv8siv8di2;
21823 halfmode = V8SImode;
21824 extract
21825 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21826 break;
21827 case V8SImode:
21828 if (unsigned_p)
21829 unpack = gen_avx2_zero_extendv4siv4di2;
21830 else
21831 unpack = gen_avx2_sign_extendv4siv4di2;
21832 halfmode = V4SImode;
21833 extract
21834 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21835 break;
21836 case V16QImode:
21837 if (unsigned_p)
21838 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21839 else
21840 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21841 break;
21842 case V8HImode:
21843 if (unsigned_p)
21844 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21845 else
21846 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21847 break;
21848 case V4SImode:
21849 if (unsigned_p)
21850 unpack = gen_sse4_1_zero_extendv2siv2di2;
21851 else
21852 unpack = gen_sse4_1_sign_extendv2siv2di2;
21853 break;
21854 default:
21855 gcc_unreachable ();
21858 if (GET_MODE_SIZE (imode) >= 32)
21860 tmp = gen_reg_rtx (halfmode);
21861 emit_insn (extract (tmp, src));
21863 else if (high_p)
21865 /* Shift higher 8 bytes to lower 8 bytes. */
21866 tmp = gen_reg_rtx (V1TImode);
21867 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21868 GEN_INT (64)));
21869 tmp = gen_lowpart (imode, tmp);
21871 else
21872 tmp = src;
21874 emit_insn (unpack (dest, tmp));
21876 else
21878 rtx (*unpack)(rtx, rtx, rtx);
21880 switch (imode)
21882 case V16QImode:
21883 if (high_p)
21884 unpack = gen_vec_interleave_highv16qi;
21885 else
21886 unpack = gen_vec_interleave_lowv16qi;
21887 break;
21888 case V8HImode:
21889 if (high_p)
21890 unpack = gen_vec_interleave_highv8hi;
21891 else
21892 unpack = gen_vec_interleave_lowv8hi;
21893 break;
21894 case V4SImode:
21895 if (high_p)
21896 unpack = gen_vec_interleave_highv4si;
21897 else
21898 unpack = gen_vec_interleave_lowv4si;
21899 break;
21900 default:
21901 gcc_unreachable ();
21904 if (unsigned_p)
21905 tmp = force_reg (imode, CONST0_RTX (imode));
21906 else
21907 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21908 src, pc_rtx, pc_rtx);
21910 rtx tmp2 = gen_reg_rtx (imode);
21911 emit_insn (unpack (tmp2, src, tmp));
21912 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21916 /* Expand conditional increment or decrement using adb/sbb instructions.
21917 The default case using setcc followed by the conditional move can be
21918 done by generic code. */
21919 bool
21920 ix86_expand_int_addcc (rtx operands[])
21922 enum rtx_code code = GET_CODE (operands[1]);
21923 rtx flags;
21924 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21925 rtx compare_op;
21926 rtx val = const0_rtx;
21927 bool fpcmp = false;
21928 enum machine_mode mode;
21929 rtx op0 = XEXP (operands[1], 0);
21930 rtx op1 = XEXP (operands[1], 1);
21932 if (operands[3] != const1_rtx
21933 && operands[3] != constm1_rtx)
21934 return false;
21935 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21936 return false;
21937 code = GET_CODE (compare_op);
21939 flags = XEXP (compare_op, 0);
21941 if (GET_MODE (flags) == CCFPmode
21942 || GET_MODE (flags) == CCFPUmode)
21944 fpcmp = true;
21945 code = ix86_fp_compare_code_to_integer (code);
21948 if (code != LTU)
21950 val = constm1_rtx;
21951 if (fpcmp)
21952 PUT_CODE (compare_op,
21953 reverse_condition_maybe_unordered
21954 (GET_CODE (compare_op)));
21955 else
21956 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21959 mode = GET_MODE (operands[0]);
21961 /* Construct either adc or sbb insn. */
21962 if ((code == LTU) == (operands[3] == constm1_rtx))
21964 switch (mode)
21966 case QImode:
21967 insn = gen_subqi3_carry;
21968 break;
21969 case HImode:
21970 insn = gen_subhi3_carry;
21971 break;
21972 case SImode:
21973 insn = gen_subsi3_carry;
21974 break;
21975 case DImode:
21976 insn = gen_subdi3_carry;
21977 break;
21978 default:
21979 gcc_unreachable ();
21982 else
21984 switch (mode)
21986 case QImode:
21987 insn = gen_addqi3_carry;
21988 break;
21989 case HImode:
21990 insn = gen_addhi3_carry;
21991 break;
21992 case SImode:
21993 insn = gen_addsi3_carry;
21994 break;
21995 case DImode:
21996 insn = gen_adddi3_carry;
21997 break;
21998 default:
21999 gcc_unreachable ();
22002 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22004 return true;
22008 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22009 but works for floating pointer parameters and nonoffsetable memories.
22010 For pushes, it returns just stack offsets; the values will be saved
22011 in the right order. Maximally three parts are generated. */
22013 static int
22014 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22016 int size;
22018 if (!TARGET_64BIT)
22019 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22020 else
22021 size = (GET_MODE_SIZE (mode) + 4) / 8;
22023 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22024 gcc_assert (size >= 2 && size <= 4);
22026 /* Optimize constant pool reference to immediates. This is used by fp
22027 moves, that force all constants to memory to allow combining. */
22028 if (MEM_P (operand) && MEM_READONLY_P (operand))
22030 rtx tmp = maybe_get_pool_constant (operand);
22031 if (tmp)
22032 operand = tmp;
22035 if (MEM_P (operand) && !offsettable_memref_p (operand))
22037 /* The only non-offsetable memories we handle are pushes. */
22038 int ok = push_operand (operand, VOIDmode);
22040 gcc_assert (ok);
22042 operand = copy_rtx (operand);
22043 PUT_MODE (operand, word_mode);
22044 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22045 return size;
22048 if (GET_CODE (operand) == CONST_VECTOR)
22050 enum machine_mode imode = int_mode_for_mode (mode);
22051 /* Caution: if we looked through a constant pool memory above,
22052 the operand may actually have a different mode now. That's
22053 ok, since we want to pun this all the way back to an integer. */
22054 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22055 gcc_assert (operand != NULL);
22056 mode = imode;
22059 if (!TARGET_64BIT)
22061 if (mode == DImode)
22062 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22063 else
22065 int i;
22067 if (REG_P (operand))
22069 gcc_assert (reload_completed);
22070 for (i = 0; i < size; i++)
22071 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22073 else if (offsettable_memref_p (operand))
22075 operand = adjust_address (operand, SImode, 0);
22076 parts[0] = operand;
22077 for (i = 1; i < size; i++)
22078 parts[i] = adjust_address (operand, SImode, 4 * i);
22080 else if (GET_CODE (operand) == CONST_DOUBLE)
22082 REAL_VALUE_TYPE r;
22083 long l[4];
22085 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22086 switch (mode)
22088 case TFmode:
22089 real_to_target (l, &r, mode);
22090 parts[3] = gen_int_mode (l[3], SImode);
22091 parts[2] = gen_int_mode (l[2], SImode);
22092 break;
22093 case XFmode:
22094 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22095 long double may not be 80-bit. */
22096 real_to_target (l, &r, mode);
22097 parts[2] = gen_int_mode (l[2], SImode);
22098 break;
22099 case DFmode:
22100 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22101 break;
22102 default:
22103 gcc_unreachable ();
22105 parts[1] = gen_int_mode (l[1], SImode);
22106 parts[0] = gen_int_mode (l[0], SImode);
22108 else
22109 gcc_unreachable ();
22112 else
22114 if (mode == TImode)
22115 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22116 if (mode == XFmode || mode == TFmode)
22118 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22119 if (REG_P (operand))
22121 gcc_assert (reload_completed);
22122 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22123 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22125 else if (offsettable_memref_p (operand))
22127 operand = adjust_address (operand, DImode, 0);
22128 parts[0] = operand;
22129 parts[1] = adjust_address (operand, upper_mode, 8);
22131 else if (GET_CODE (operand) == CONST_DOUBLE)
22133 REAL_VALUE_TYPE r;
22134 long l[4];
22136 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22137 real_to_target (l, &r, mode);
22139 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22140 if (HOST_BITS_PER_WIDE_INT >= 64)
22141 parts[0]
22142 = gen_int_mode
22143 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22144 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22145 DImode);
22146 else
22147 parts[0] = immed_double_const (l[0], l[1], DImode);
22149 if (upper_mode == SImode)
22150 parts[1] = gen_int_mode (l[2], SImode);
22151 else if (HOST_BITS_PER_WIDE_INT >= 64)
22152 parts[1]
22153 = gen_int_mode
22154 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22155 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22156 DImode);
22157 else
22158 parts[1] = immed_double_const (l[2], l[3], DImode);
22160 else
22161 gcc_unreachable ();
22165 return size;
22168 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22169 Return false when normal moves are needed; true when all required
22170 insns have been emitted. Operands 2-4 contain the input values
22171 int the correct order; operands 5-7 contain the output values. */
22173 void
22174 ix86_split_long_move (rtx operands[])
22176 rtx part[2][4];
22177 int nparts, i, j;
22178 int push = 0;
22179 int collisions = 0;
22180 enum machine_mode mode = GET_MODE (operands[0]);
22181 bool collisionparts[4];
22183 /* The DFmode expanders may ask us to move double.
22184 For 64bit target this is single move. By hiding the fact
22185 here we simplify i386.md splitters. */
22186 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22188 /* Optimize constant pool reference to immediates. This is used by
22189 fp moves, that force all constants to memory to allow combining. */
22191 if (MEM_P (operands[1])
22192 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22193 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22194 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22195 if (push_operand (operands[0], VOIDmode))
22197 operands[0] = copy_rtx (operands[0]);
22198 PUT_MODE (operands[0], word_mode);
22200 else
22201 operands[0] = gen_lowpart (DImode, operands[0]);
22202 operands[1] = gen_lowpart (DImode, operands[1]);
22203 emit_move_insn (operands[0], operands[1]);
22204 return;
22207 /* The only non-offsettable memory we handle is push. */
22208 if (push_operand (operands[0], VOIDmode))
22209 push = 1;
22210 else
22211 gcc_assert (!MEM_P (operands[0])
22212 || offsettable_memref_p (operands[0]));
22214 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22215 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22217 /* When emitting push, take care for source operands on the stack. */
22218 if (push && MEM_P (operands[1])
22219 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22221 rtx src_base = XEXP (part[1][nparts - 1], 0);
22223 /* Compensate for the stack decrement by 4. */
22224 if (!TARGET_64BIT && nparts == 3
22225 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22226 src_base = plus_constant (Pmode, src_base, 4);
22228 /* src_base refers to the stack pointer and is
22229 automatically decreased by emitted push. */
22230 for (i = 0; i < nparts; i++)
22231 part[1][i] = change_address (part[1][i],
22232 GET_MODE (part[1][i]), src_base);
22235 /* We need to do copy in the right order in case an address register
22236 of the source overlaps the destination. */
22237 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22239 rtx tmp;
22241 for (i = 0; i < nparts; i++)
22243 collisionparts[i]
22244 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22245 if (collisionparts[i])
22246 collisions++;
22249 /* Collision in the middle part can be handled by reordering. */
22250 if (collisions == 1 && nparts == 3 && collisionparts [1])
22252 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22253 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22255 else if (collisions == 1
22256 && nparts == 4
22257 && (collisionparts [1] || collisionparts [2]))
22259 if (collisionparts [1])
22261 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22262 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22264 else
22266 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22267 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22271 /* If there are more collisions, we can't handle it by reordering.
22272 Do an lea to the last part and use only one colliding move. */
22273 else if (collisions > 1)
22275 rtx base, addr, tls_base = NULL_RTX;
22277 collisions = 1;
22279 base = part[0][nparts - 1];
22281 /* Handle the case when the last part isn't valid for lea.
22282 Happens in 64-bit mode storing the 12-byte XFmode. */
22283 if (GET_MODE (base) != Pmode)
22284 base = gen_rtx_REG (Pmode, REGNO (base));
22286 addr = XEXP (part[1][0], 0);
22287 if (TARGET_TLS_DIRECT_SEG_REFS)
22289 struct ix86_address parts;
22290 int ok = ix86_decompose_address (addr, &parts);
22291 gcc_assert (ok);
22292 if (parts.seg == DEFAULT_TLS_SEG_REG)
22294 /* It is not valid to use %gs: or %fs: in
22295 lea though, so we need to remove it from the
22296 address used for lea and add it to each individual
22297 memory loads instead. */
22298 addr = copy_rtx (addr);
22299 rtx *x = &addr;
22300 while (GET_CODE (*x) == PLUS)
22302 for (i = 0; i < 2; i++)
22304 rtx u = XEXP (*x, i);
22305 if (GET_CODE (u) == ZERO_EXTEND)
22306 u = XEXP (u, 0);
22307 if (GET_CODE (u) == UNSPEC
22308 && XINT (u, 1) == UNSPEC_TP)
22310 tls_base = XEXP (*x, i);
22311 *x = XEXP (*x, 1 - i);
22312 break;
22315 if (tls_base)
22316 break;
22317 x = &XEXP (*x, 0);
22319 gcc_assert (tls_base);
22322 emit_insn (gen_rtx_SET (VOIDmode, base, addr));
22323 if (tls_base)
22324 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
22325 part[1][0] = replace_equiv_address (part[1][0], base);
22326 for (i = 1; i < nparts; i++)
22328 if (tls_base)
22329 base = copy_rtx (base);
22330 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22331 part[1][i] = replace_equiv_address (part[1][i], tmp);
22336 if (push)
22338 if (!TARGET_64BIT)
22340 if (nparts == 3)
22342 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22343 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22344 stack_pointer_rtx, GEN_INT (-4)));
22345 emit_move_insn (part[0][2], part[1][2]);
22347 else if (nparts == 4)
22349 emit_move_insn (part[0][3], part[1][3]);
22350 emit_move_insn (part[0][2], part[1][2]);
22353 else
22355 /* In 64bit mode we don't have 32bit push available. In case this is
22356 register, it is OK - we will just use larger counterpart. We also
22357 retype memory - these comes from attempt to avoid REX prefix on
22358 moving of second half of TFmode value. */
22359 if (GET_MODE (part[1][1]) == SImode)
22361 switch (GET_CODE (part[1][1]))
22363 case MEM:
22364 part[1][1] = adjust_address (part[1][1], DImode, 0);
22365 break;
22367 case REG:
22368 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22369 break;
22371 default:
22372 gcc_unreachable ();
22375 if (GET_MODE (part[1][0]) == SImode)
22376 part[1][0] = part[1][1];
22379 emit_move_insn (part[0][1], part[1][1]);
22380 emit_move_insn (part[0][0], part[1][0]);
22381 return;
22384 /* Choose correct order to not overwrite the source before it is copied. */
22385 if ((REG_P (part[0][0])
22386 && REG_P (part[1][1])
22387 && (REGNO (part[0][0]) == REGNO (part[1][1])
22388 || (nparts == 3
22389 && REGNO (part[0][0]) == REGNO (part[1][2]))
22390 || (nparts == 4
22391 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22392 || (collisions > 0
22393 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22395 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22397 operands[2 + i] = part[0][j];
22398 operands[6 + i] = part[1][j];
22401 else
22403 for (i = 0; i < nparts; i++)
22405 operands[2 + i] = part[0][i];
22406 operands[6 + i] = part[1][i];
22410 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22411 if (optimize_insn_for_size_p ())
22413 for (j = 0; j < nparts - 1; j++)
22414 if (CONST_INT_P (operands[6 + j])
22415 && operands[6 + j] != const0_rtx
22416 && REG_P (operands[2 + j]))
22417 for (i = j; i < nparts - 1; i++)
22418 if (CONST_INT_P (operands[7 + i])
22419 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22420 operands[7 + i] = operands[2 + j];
22423 for (i = 0; i < nparts; i++)
22424 emit_move_insn (operands[2 + i], operands[6 + i]);
22426 return;
22429 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22430 left shift by a constant, either using a single shift or
22431 a sequence of add instructions. */
22433 static void
22434 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22436 rtx (*insn)(rtx, rtx, rtx);
22438 if (count == 1
22439 || (count * ix86_cost->add <= ix86_cost->shift_const
22440 && !optimize_insn_for_size_p ()))
22442 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22443 while (count-- > 0)
22444 emit_insn (insn (operand, operand, operand));
22446 else
22448 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22449 emit_insn (insn (operand, operand, GEN_INT (count)));
22453 void
22454 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22456 rtx (*gen_ashl3)(rtx, rtx, rtx);
22457 rtx (*gen_shld)(rtx, rtx, rtx);
22458 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22460 rtx low[2], high[2];
22461 int count;
22463 if (CONST_INT_P (operands[2]))
22465 split_double_mode (mode, operands, 2, low, high);
22466 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22468 if (count >= half_width)
22470 emit_move_insn (high[0], low[1]);
22471 emit_move_insn (low[0], const0_rtx);
22473 if (count > half_width)
22474 ix86_expand_ashl_const (high[0], count - half_width, mode);
22476 else
22478 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22480 if (!rtx_equal_p (operands[0], operands[1]))
22481 emit_move_insn (operands[0], operands[1]);
22483 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22484 ix86_expand_ashl_const (low[0], count, mode);
22486 return;
22489 split_double_mode (mode, operands, 1, low, high);
22491 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22493 if (operands[1] == const1_rtx)
22495 /* Assuming we've chosen a QImode capable registers, then 1 << N
22496 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22497 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22499 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22501 ix86_expand_clear (low[0]);
22502 ix86_expand_clear (high[0]);
22503 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22505 d = gen_lowpart (QImode, low[0]);
22506 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22507 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22508 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22510 d = gen_lowpart (QImode, high[0]);
22511 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22512 s = gen_rtx_NE (QImode, flags, const0_rtx);
22513 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22516 /* Otherwise, we can get the same results by manually performing
22517 a bit extract operation on bit 5/6, and then performing the two
22518 shifts. The two methods of getting 0/1 into low/high are exactly
22519 the same size. Avoiding the shift in the bit extract case helps
22520 pentium4 a bit; no one else seems to care much either way. */
22521 else
22523 enum machine_mode half_mode;
22524 rtx (*gen_lshr3)(rtx, rtx, rtx);
22525 rtx (*gen_and3)(rtx, rtx, rtx);
22526 rtx (*gen_xor3)(rtx, rtx, rtx);
22527 HOST_WIDE_INT bits;
22528 rtx x;
22530 if (mode == DImode)
22532 half_mode = SImode;
22533 gen_lshr3 = gen_lshrsi3;
22534 gen_and3 = gen_andsi3;
22535 gen_xor3 = gen_xorsi3;
22536 bits = 5;
22538 else
22540 half_mode = DImode;
22541 gen_lshr3 = gen_lshrdi3;
22542 gen_and3 = gen_anddi3;
22543 gen_xor3 = gen_xordi3;
22544 bits = 6;
22547 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22548 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22549 else
22550 x = gen_lowpart (half_mode, operands[2]);
22551 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22553 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22554 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22555 emit_move_insn (low[0], high[0]);
22556 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22559 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22560 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22561 return;
22564 if (operands[1] == constm1_rtx)
22566 /* For -1 << N, we can avoid the shld instruction, because we
22567 know that we're shifting 0...31/63 ones into a -1. */
22568 emit_move_insn (low[0], constm1_rtx);
22569 if (optimize_insn_for_size_p ())
22570 emit_move_insn (high[0], low[0]);
22571 else
22572 emit_move_insn (high[0], constm1_rtx);
22574 else
22576 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22578 if (!rtx_equal_p (operands[0], operands[1]))
22579 emit_move_insn (operands[0], operands[1]);
22581 split_double_mode (mode, operands, 1, low, high);
22582 emit_insn (gen_shld (high[0], low[0], operands[2]));
22585 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22587 if (TARGET_CMOVE && scratch)
22589 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22590 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22592 ix86_expand_clear (scratch);
22593 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22595 else
22597 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22598 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22600 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22604 void
22605 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22607 rtx (*gen_ashr3)(rtx, rtx, rtx)
22608 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22609 rtx (*gen_shrd)(rtx, rtx, rtx);
22610 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22612 rtx low[2], high[2];
22613 int count;
22615 if (CONST_INT_P (operands[2]))
22617 split_double_mode (mode, operands, 2, low, high);
22618 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22620 if (count == GET_MODE_BITSIZE (mode) - 1)
22622 emit_move_insn (high[0], high[1]);
22623 emit_insn (gen_ashr3 (high[0], high[0],
22624 GEN_INT (half_width - 1)));
22625 emit_move_insn (low[0], high[0]);
22628 else if (count >= half_width)
22630 emit_move_insn (low[0], high[1]);
22631 emit_move_insn (high[0], low[0]);
22632 emit_insn (gen_ashr3 (high[0], high[0],
22633 GEN_INT (half_width - 1)));
22635 if (count > half_width)
22636 emit_insn (gen_ashr3 (low[0], low[0],
22637 GEN_INT (count - half_width)));
22639 else
22641 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22643 if (!rtx_equal_p (operands[0], operands[1]))
22644 emit_move_insn (operands[0], operands[1]);
22646 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22647 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22650 else
22652 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22654 if (!rtx_equal_p (operands[0], operands[1]))
22655 emit_move_insn (operands[0], operands[1]);
22657 split_double_mode (mode, operands, 1, low, high);
22659 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22660 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22662 if (TARGET_CMOVE && scratch)
22664 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22665 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22667 emit_move_insn (scratch, high[0]);
22668 emit_insn (gen_ashr3 (scratch, scratch,
22669 GEN_INT (half_width - 1)));
22670 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22671 scratch));
22673 else
22675 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22676 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22678 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22683 void
22684 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22686 rtx (*gen_lshr3)(rtx, rtx, rtx)
22687 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22688 rtx (*gen_shrd)(rtx, rtx, rtx);
22689 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22691 rtx low[2], high[2];
22692 int count;
22694 if (CONST_INT_P (operands[2]))
22696 split_double_mode (mode, operands, 2, low, high);
22697 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22699 if (count >= half_width)
22701 emit_move_insn (low[0], high[1]);
22702 ix86_expand_clear (high[0]);
22704 if (count > half_width)
22705 emit_insn (gen_lshr3 (low[0], low[0],
22706 GEN_INT (count - half_width)));
22708 else
22710 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22712 if (!rtx_equal_p (operands[0], operands[1]))
22713 emit_move_insn (operands[0], operands[1]);
22715 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22716 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22719 else
22721 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22723 if (!rtx_equal_p (operands[0], operands[1]))
22724 emit_move_insn (operands[0], operands[1]);
22726 split_double_mode (mode, operands, 1, low, high);
22728 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22729 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22731 if (TARGET_CMOVE && scratch)
22733 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22734 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22736 ix86_expand_clear (scratch);
22737 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22738 scratch));
22740 else
22742 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22743 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22745 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22750 /* Predict just emitted jump instruction to be taken with probability PROB. */
22751 static void
22752 predict_jump (int prob)
22754 rtx insn = get_last_insn ();
22755 gcc_assert (JUMP_P (insn));
22756 add_int_reg_note (insn, REG_BR_PROB, prob);
22759 /* Helper function for the string operations below. Dest VARIABLE whether
22760 it is aligned to VALUE bytes. If true, jump to the label. */
22761 static rtx
22762 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22764 rtx label = gen_label_rtx ();
22765 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22766 if (GET_MODE (variable) == DImode)
22767 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22768 else
22769 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22770 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22771 1, label);
22772 if (epilogue)
22773 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22774 else
22775 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22776 return label;
22779 /* Adjust COUNTER by the VALUE. */
22780 static void
22781 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22783 rtx (*gen_add)(rtx, rtx, rtx)
22784 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22786 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22789 /* Zero extend possibly SImode EXP to Pmode register. */
22791 ix86_zero_extend_to_Pmode (rtx exp)
22793 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22796 /* Divide COUNTREG by SCALE. */
22797 static rtx
22798 scale_counter (rtx countreg, int scale)
22800 rtx sc;
22802 if (scale == 1)
22803 return countreg;
22804 if (CONST_INT_P (countreg))
22805 return GEN_INT (INTVAL (countreg) / scale);
22806 gcc_assert (REG_P (countreg));
22808 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22809 GEN_INT (exact_log2 (scale)),
22810 NULL, 1, OPTAB_DIRECT);
22811 return sc;
22814 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22815 DImode for constant loop counts. */
22817 static enum machine_mode
22818 counter_mode (rtx count_exp)
22820 if (GET_MODE (count_exp) != VOIDmode)
22821 return GET_MODE (count_exp);
22822 if (!CONST_INT_P (count_exp))
22823 return Pmode;
22824 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22825 return DImode;
22826 return SImode;
22829 /* Copy the address to a Pmode register. This is used for x32 to
22830 truncate DImode TLS address to a SImode register. */
22832 static rtx
22833 ix86_copy_addr_to_reg (rtx addr)
22835 rtx reg;
22836 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22838 reg = copy_addr_to_reg (addr);
22839 REG_POINTER (reg) = 1;
22840 return reg;
22842 else
22844 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22845 reg = copy_to_mode_reg (DImode, addr);
22846 REG_POINTER (reg) = 1;
22847 return gen_rtx_SUBREG (SImode, reg, 0);
22851 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22852 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22853 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22854 memory by VALUE (supposed to be in MODE).
22856 The size is rounded down to whole number of chunk size moved at once.
22857 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22860 static void
22861 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22862 rtx destptr, rtx srcptr, rtx value,
22863 rtx count, enum machine_mode mode, int unroll,
22864 int expected_size, bool issetmem)
22866 rtx out_label, top_label, iter, tmp;
22867 enum machine_mode iter_mode = counter_mode (count);
22868 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22869 rtx piece_size = GEN_INT (piece_size_n);
22870 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22871 rtx size;
22872 int i;
22874 top_label = gen_label_rtx ();
22875 out_label = gen_label_rtx ();
22876 iter = gen_reg_rtx (iter_mode);
22878 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22879 NULL, 1, OPTAB_DIRECT);
22880 /* Those two should combine. */
22881 if (piece_size == const1_rtx)
22883 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22884 true, out_label);
22885 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22887 emit_move_insn (iter, const0_rtx);
22889 emit_label (top_label);
22891 tmp = convert_modes (Pmode, iter_mode, iter, true);
22893 /* This assert could be relaxed - in this case we'll need to compute
22894 smallest power of two, containing in PIECE_SIZE_N and pass it to
22895 offset_address. */
22896 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22897 destmem = offset_address (destmem, tmp, piece_size_n);
22898 destmem = adjust_address (destmem, mode, 0);
22900 if (!issetmem)
22902 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22903 srcmem = adjust_address (srcmem, mode, 0);
22905 /* When unrolling for chips that reorder memory reads and writes,
22906 we can save registers by using single temporary.
22907 Also using 4 temporaries is overkill in 32bit mode. */
22908 if (!TARGET_64BIT && 0)
22910 for (i = 0; i < unroll; i++)
22912 if (i)
22914 destmem =
22915 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22916 srcmem =
22917 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22919 emit_move_insn (destmem, srcmem);
22922 else
22924 rtx tmpreg[4];
22925 gcc_assert (unroll <= 4);
22926 for (i = 0; i < unroll; i++)
22928 tmpreg[i] = gen_reg_rtx (mode);
22929 if (i)
22931 srcmem =
22932 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22934 emit_move_insn (tmpreg[i], srcmem);
22936 for (i = 0; i < unroll; i++)
22938 if (i)
22940 destmem =
22941 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22943 emit_move_insn (destmem, tmpreg[i]);
22947 else
22948 for (i = 0; i < unroll; i++)
22950 if (i)
22951 destmem =
22952 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22953 emit_move_insn (destmem, value);
22956 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22957 true, OPTAB_LIB_WIDEN);
22958 if (tmp != iter)
22959 emit_move_insn (iter, tmp);
22961 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22962 true, top_label);
22963 if (expected_size != -1)
22965 expected_size /= GET_MODE_SIZE (mode) * unroll;
22966 if (expected_size == 0)
22967 predict_jump (0);
22968 else if (expected_size > REG_BR_PROB_BASE)
22969 predict_jump (REG_BR_PROB_BASE - 1);
22970 else
22971 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22973 else
22974 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22975 iter = ix86_zero_extend_to_Pmode (iter);
22976 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22977 true, OPTAB_LIB_WIDEN);
22978 if (tmp != destptr)
22979 emit_move_insn (destptr, tmp);
22980 if (!issetmem)
22982 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22983 true, OPTAB_LIB_WIDEN);
22984 if (tmp != srcptr)
22985 emit_move_insn (srcptr, tmp);
22987 emit_label (out_label);
22990 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22991 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22992 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22993 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22994 ORIG_VALUE is the original value passed to memset to fill the memory with.
22995 Other arguments have same meaning as for previous function. */
22997 static void
22998 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22999 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23000 rtx count,
23001 enum machine_mode mode, bool issetmem)
23003 rtx destexp;
23004 rtx srcexp;
23005 rtx countreg;
23006 HOST_WIDE_INT rounded_count;
23008 /* If possible, it is shorter to use rep movs.
23009 TODO: Maybe it is better to move this logic to decide_alg. */
23010 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23011 && (!issetmem || orig_value == const0_rtx))
23012 mode = SImode;
23014 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23015 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23017 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23018 GET_MODE_SIZE (mode)));
23019 if (mode != QImode)
23021 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23022 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23023 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23025 else
23026 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23027 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23029 rounded_count = (INTVAL (count)
23030 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23031 destmem = shallow_copy_rtx (destmem);
23032 set_mem_size (destmem, rounded_count);
23034 else if (MEM_SIZE_KNOWN_P (destmem))
23035 clear_mem_size (destmem);
23037 if (issetmem)
23039 value = force_reg (mode, gen_lowpart (mode, value));
23040 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23042 else
23044 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23045 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23046 if (mode != QImode)
23048 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23049 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23050 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23052 else
23053 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23054 if (CONST_INT_P (count))
23056 rounded_count = (INTVAL (count)
23057 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23058 srcmem = shallow_copy_rtx (srcmem);
23059 set_mem_size (srcmem, rounded_count);
23061 else
23063 if (MEM_SIZE_KNOWN_P (srcmem))
23064 clear_mem_size (srcmem);
23066 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23067 destexp, srcexp));
23071 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23072 DESTMEM.
23073 SRC is passed by pointer to be updated on return.
23074 Return value is updated DST. */
23075 static rtx
23076 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23077 HOST_WIDE_INT size_to_move)
23079 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23080 enum insn_code code;
23081 enum machine_mode move_mode;
23082 int piece_size, i;
23084 /* Find the widest mode in which we could perform moves.
23085 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23086 it until move of such size is supported. */
23087 piece_size = 1 << floor_log2 (size_to_move);
23088 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23089 code = optab_handler (mov_optab, move_mode);
23090 while (code == CODE_FOR_nothing && piece_size > 1)
23092 piece_size >>= 1;
23093 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23094 code = optab_handler (mov_optab, move_mode);
23097 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23098 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23099 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23101 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23102 move_mode = mode_for_vector (word_mode, nunits);
23103 code = optab_handler (mov_optab, move_mode);
23104 if (code == CODE_FOR_nothing)
23106 move_mode = word_mode;
23107 piece_size = GET_MODE_SIZE (move_mode);
23108 code = optab_handler (mov_optab, move_mode);
23111 gcc_assert (code != CODE_FOR_nothing);
23113 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23114 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23116 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23117 gcc_assert (size_to_move % piece_size == 0);
23118 adjust = GEN_INT (piece_size);
23119 for (i = 0; i < size_to_move; i += piece_size)
23121 /* We move from memory to memory, so we'll need to do it via
23122 a temporary register. */
23123 tempreg = gen_reg_rtx (move_mode);
23124 emit_insn (GEN_FCN (code) (tempreg, src));
23125 emit_insn (GEN_FCN (code) (dst, tempreg));
23127 emit_move_insn (destptr,
23128 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23129 emit_move_insn (srcptr,
23130 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23132 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23133 piece_size);
23134 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23135 piece_size);
23138 /* Update DST and SRC rtx. */
23139 *srcmem = src;
23140 return dst;
23143 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23144 static void
23145 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23146 rtx destptr, rtx srcptr, rtx count, int max_size)
23148 rtx src, dest;
23149 if (CONST_INT_P (count))
23151 HOST_WIDE_INT countval = INTVAL (count);
23152 HOST_WIDE_INT epilogue_size = countval % max_size;
23153 int i;
23155 /* For now MAX_SIZE should be a power of 2. This assert could be
23156 relaxed, but it'll require a bit more complicated epilogue
23157 expanding. */
23158 gcc_assert ((max_size & (max_size - 1)) == 0);
23159 for (i = max_size; i >= 1; i >>= 1)
23161 if (epilogue_size & i)
23162 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23164 return;
23166 if (max_size > 8)
23168 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23169 count, 1, OPTAB_DIRECT);
23170 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23171 count, QImode, 1, 4, false);
23172 return;
23175 /* When there are stringops, we can cheaply increase dest and src pointers.
23176 Otherwise we save code size by maintaining offset (zero is readily
23177 available from preceding rep operation) and using x86 addressing modes.
23179 if (TARGET_SINGLE_STRINGOP)
23181 if (max_size > 4)
23183 rtx label = ix86_expand_aligntest (count, 4, true);
23184 src = change_address (srcmem, SImode, srcptr);
23185 dest = change_address (destmem, SImode, destptr);
23186 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23187 emit_label (label);
23188 LABEL_NUSES (label) = 1;
23190 if (max_size > 2)
23192 rtx label = ix86_expand_aligntest (count, 2, true);
23193 src = change_address (srcmem, HImode, srcptr);
23194 dest = change_address (destmem, HImode, destptr);
23195 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23196 emit_label (label);
23197 LABEL_NUSES (label) = 1;
23199 if (max_size > 1)
23201 rtx label = ix86_expand_aligntest (count, 1, true);
23202 src = change_address (srcmem, QImode, srcptr);
23203 dest = change_address (destmem, QImode, destptr);
23204 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23205 emit_label (label);
23206 LABEL_NUSES (label) = 1;
23209 else
23211 rtx offset = force_reg (Pmode, const0_rtx);
23212 rtx tmp;
23214 if (max_size > 4)
23216 rtx label = ix86_expand_aligntest (count, 4, true);
23217 src = change_address (srcmem, SImode, srcptr);
23218 dest = change_address (destmem, SImode, destptr);
23219 emit_move_insn (dest, src);
23220 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23221 true, OPTAB_LIB_WIDEN);
23222 if (tmp != offset)
23223 emit_move_insn (offset, tmp);
23224 emit_label (label);
23225 LABEL_NUSES (label) = 1;
23227 if (max_size > 2)
23229 rtx label = ix86_expand_aligntest (count, 2, true);
23230 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23231 src = change_address (srcmem, HImode, tmp);
23232 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23233 dest = change_address (destmem, HImode, tmp);
23234 emit_move_insn (dest, src);
23235 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23236 true, OPTAB_LIB_WIDEN);
23237 if (tmp != offset)
23238 emit_move_insn (offset, tmp);
23239 emit_label (label);
23240 LABEL_NUSES (label) = 1;
23242 if (max_size > 1)
23244 rtx label = ix86_expand_aligntest (count, 1, true);
23245 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23246 src = change_address (srcmem, QImode, tmp);
23247 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23248 dest = change_address (destmem, QImode, tmp);
23249 emit_move_insn (dest, src);
23250 emit_label (label);
23251 LABEL_NUSES (label) = 1;
23256 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23257 with value PROMOTED_VAL.
23258 SRC is passed by pointer to be updated on return.
23259 Return value is updated DST. */
23260 static rtx
23261 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23262 HOST_WIDE_INT size_to_move)
23264 rtx dst = destmem, adjust;
23265 enum insn_code code;
23266 enum machine_mode move_mode;
23267 int piece_size, i;
23269 /* Find the widest mode in which we could perform moves.
23270 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23271 it until move of such size is supported. */
23272 move_mode = GET_MODE (promoted_val);
23273 if (move_mode == VOIDmode)
23274 move_mode = QImode;
23275 if (size_to_move < GET_MODE_SIZE (move_mode))
23277 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23278 promoted_val = gen_lowpart (move_mode, promoted_val);
23280 piece_size = GET_MODE_SIZE (move_mode);
23281 code = optab_handler (mov_optab, move_mode);
23282 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23284 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23286 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23287 gcc_assert (size_to_move % piece_size == 0);
23288 adjust = GEN_INT (piece_size);
23289 for (i = 0; i < size_to_move; i += piece_size)
23291 if (piece_size <= GET_MODE_SIZE (word_mode))
23293 emit_insn (gen_strset (destptr, dst, promoted_val));
23294 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23295 piece_size);
23296 continue;
23299 emit_insn (GEN_FCN (code) (dst, promoted_val));
23301 emit_move_insn (destptr,
23302 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23304 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23305 piece_size);
23308 /* Update DST rtx. */
23309 return dst;
23311 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23312 static void
23313 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23314 rtx count, int max_size)
23316 count =
23317 expand_simple_binop (counter_mode (count), AND, count,
23318 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23319 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23320 gen_lowpart (QImode, value), count, QImode,
23321 1, max_size / 2, true);
23324 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23325 static void
23326 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23327 rtx count, int max_size)
23329 rtx dest;
23331 if (CONST_INT_P (count))
23333 HOST_WIDE_INT countval = INTVAL (count);
23334 HOST_WIDE_INT epilogue_size = countval % max_size;
23335 int i;
23337 /* For now MAX_SIZE should be a power of 2. This assert could be
23338 relaxed, but it'll require a bit more complicated epilogue
23339 expanding. */
23340 gcc_assert ((max_size & (max_size - 1)) == 0);
23341 for (i = max_size; i >= 1; i >>= 1)
23343 if (epilogue_size & i)
23345 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23346 destmem = emit_memset (destmem, destptr, vec_value, i);
23347 else
23348 destmem = emit_memset (destmem, destptr, value, i);
23351 return;
23353 if (max_size > 32)
23355 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23356 return;
23358 if (max_size > 16)
23360 rtx label = ix86_expand_aligntest (count, 16, true);
23361 if (TARGET_64BIT)
23363 dest = change_address (destmem, DImode, destptr);
23364 emit_insn (gen_strset (destptr, dest, value));
23365 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23366 emit_insn (gen_strset (destptr, dest, value));
23368 else
23370 dest = change_address (destmem, SImode, destptr);
23371 emit_insn (gen_strset (destptr, dest, value));
23372 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23373 emit_insn (gen_strset (destptr, dest, value));
23374 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23375 emit_insn (gen_strset (destptr, dest, value));
23376 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23377 emit_insn (gen_strset (destptr, dest, value));
23379 emit_label (label);
23380 LABEL_NUSES (label) = 1;
23382 if (max_size > 8)
23384 rtx label = ix86_expand_aligntest (count, 8, true);
23385 if (TARGET_64BIT)
23387 dest = change_address (destmem, DImode, destptr);
23388 emit_insn (gen_strset (destptr, dest, value));
23390 else
23392 dest = change_address (destmem, SImode, destptr);
23393 emit_insn (gen_strset (destptr, dest, value));
23394 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23395 emit_insn (gen_strset (destptr, dest, value));
23397 emit_label (label);
23398 LABEL_NUSES (label) = 1;
23400 if (max_size > 4)
23402 rtx label = ix86_expand_aligntest (count, 4, true);
23403 dest = change_address (destmem, SImode, destptr);
23404 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23405 emit_label (label);
23406 LABEL_NUSES (label) = 1;
23408 if (max_size > 2)
23410 rtx label = ix86_expand_aligntest (count, 2, true);
23411 dest = change_address (destmem, HImode, destptr);
23412 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23413 emit_label (label);
23414 LABEL_NUSES (label) = 1;
23416 if (max_size > 1)
23418 rtx label = ix86_expand_aligntest (count, 1, true);
23419 dest = change_address (destmem, QImode, destptr);
23420 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23421 emit_label (label);
23422 LABEL_NUSES (label) = 1;
23426 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23427 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23428 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23429 ignored.
23430 Return value is updated DESTMEM. */
23431 static rtx
23432 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23433 rtx destptr, rtx srcptr, rtx value,
23434 rtx vec_value, rtx count, int align,
23435 int desired_alignment, bool issetmem)
23437 int i;
23438 for (i = 1; i < desired_alignment; i <<= 1)
23440 if (align <= i)
23442 rtx label = ix86_expand_aligntest (destptr, i, false);
23443 if (issetmem)
23445 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23446 destmem = emit_memset (destmem, destptr, vec_value, i);
23447 else
23448 destmem = emit_memset (destmem, destptr, value, i);
23450 else
23451 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23452 ix86_adjust_counter (count, i);
23453 emit_label (label);
23454 LABEL_NUSES (label) = 1;
23455 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23458 return destmem;
23461 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23462 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23463 and jump to DONE_LABEL. */
23464 static void
23465 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23466 rtx destptr, rtx srcptr,
23467 rtx value, rtx vec_value,
23468 rtx count, int size,
23469 rtx done_label, bool issetmem)
23471 rtx label = ix86_expand_aligntest (count, size, false);
23472 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23473 rtx modesize;
23474 int n;
23476 /* If we do not have vector value to copy, we must reduce size. */
23477 if (issetmem)
23479 if (!vec_value)
23481 if (GET_MODE (value) == VOIDmode && size > 8)
23482 mode = Pmode;
23483 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23484 mode = GET_MODE (value);
23486 else
23487 mode = GET_MODE (vec_value), value = vec_value;
23489 else
23491 /* Choose appropriate vector mode. */
23492 if (size >= 32)
23493 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23494 else if (size >= 16)
23495 mode = TARGET_SSE ? V16QImode : DImode;
23496 srcmem = change_address (srcmem, mode, srcptr);
23498 destmem = change_address (destmem, mode, destptr);
23499 modesize = GEN_INT (GET_MODE_SIZE (mode));
23500 gcc_assert (GET_MODE_SIZE (mode) <= size);
23501 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23503 if (issetmem)
23504 emit_move_insn (destmem, gen_lowpart (mode, value));
23505 else
23507 emit_move_insn (destmem, srcmem);
23508 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23510 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23513 destmem = offset_address (destmem, count, 1);
23514 destmem = offset_address (destmem, GEN_INT (-2 * size),
23515 GET_MODE_SIZE (mode));
23516 if (!issetmem)
23518 srcmem = offset_address (srcmem, count, 1);
23519 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23520 GET_MODE_SIZE (mode));
23522 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23524 if (issetmem)
23525 emit_move_insn (destmem, gen_lowpart (mode, value));
23526 else
23528 emit_move_insn (destmem, srcmem);
23529 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23531 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23533 emit_jump_insn (gen_jump (done_label));
23534 emit_barrier ();
23536 emit_label (label);
23537 LABEL_NUSES (label) = 1;
23540 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23541 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23542 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23543 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23544 DONE_LABEL is a label after the whole copying sequence. The label is created
23545 on demand if *DONE_LABEL is NULL.
23546 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23547 bounds after the initial copies.
23549 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23550 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23551 we will dispatch to a library call for large blocks.
23553 In pseudocode we do:
23555 if (COUNT < SIZE)
23557 Assume that SIZE is 4. Bigger sizes are handled analogously
23558 if (COUNT & 4)
23560 copy 4 bytes from SRCPTR to DESTPTR
23561 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23562 goto done_label
23564 if (!COUNT)
23565 goto done_label;
23566 copy 1 byte from SRCPTR to DESTPTR
23567 if (COUNT & 2)
23569 copy 2 bytes from SRCPTR to DESTPTR
23570 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23573 else
23575 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23576 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23578 OLD_DESPTR = DESTPTR;
23579 Align DESTPTR up to DESIRED_ALIGN
23580 SRCPTR += DESTPTR - OLD_DESTPTR
23581 COUNT -= DEST_PTR - OLD_DESTPTR
23582 if (DYNAMIC_CHECK)
23583 Round COUNT down to multiple of SIZE
23584 << optional caller supplied zero size guard is here >>
23585 << optional caller suppplied dynamic check is here >>
23586 << caller supplied main copy loop is here >>
23588 done_label:
23590 static void
23591 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23592 rtx *destptr, rtx *srcptr,
23593 enum machine_mode mode,
23594 rtx value, rtx vec_value,
23595 rtx *count,
23596 rtx *done_label,
23597 int size,
23598 int desired_align,
23599 int align,
23600 unsigned HOST_WIDE_INT *min_size,
23601 bool dynamic_check,
23602 bool issetmem)
23604 rtx loop_label = NULL, label;
23605 int n;
23606 rtx modesize;
23607 int prolog_size = 0;
23608 rtx mode_value;
23610 /* Chose proper value to copy. */
23611 if (issetmem && VECTOR_MODE_P (mode))
23612 mode_value = vec_value;
23613 else
23614 mode_value = value;
23615 gcc_assert (GET_MODE_SIZE (mode) <= size);
23617 /* See if block is big or small, handle small blocks. */
23618 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23620 int size2 = size;
23621 loop_label = gen_label_rtx ();
23623 if (!*done_label)
23624 *done_label = gen_label_rtx ();
23626 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23627 1, loop_label);
23628 size2 >>= 1;
23630 /* Handle sizes > 3. */
23631 for (;size2 > 2; size2 >>= 1)
23632 expand_small_movmem_or_setmem (destmem, srcmem,
23633 *destptr, *srcptr,
23634 value, vec_value,
23635 *count,
23636 size2, *done_label, issetmem);
23637 /* Nothing to copy? Jump to DONE_LABEL if so */
23638 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23639 1, *done_label);
23641 /* Do a byte copy. */
23642 destmem = change_address (destmem, QImode, *destptr);
23643 if (issetmem)
23644 emit_move_insn (destmem, gen_lowpart (QImode, value));
23645 else
23647 srcmem = change_address (srcmem, QImode, *srcptr);
23648 emit_move_insn (destmem, srcmem);
23651 /* Handle sizes 2 and 3. */
23652 label = ix86_expand_aligntest (*count, 2, false);
23653 destmem = change_address (destmem, HImode, *destptr);
23654 destmem = offset_address (destmem, *count, 1);
23655 destmem = offset_address (destmem, GEN_INT (-2), 2);
23656 if (issetmem)
23657 emit_move_insn (destmem, gen_lowpart (HImode, value));
23658 else
23660 srcmem = change_address (srcmem, HImode, *srcptr);
23661 srcmem = offset_address (srcmem, *count, 1);
23662 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23663 emit_move_insn (destmem, srcmem);
23666 emit_label (label);
23667 LABEL_NUSES (label) = 1;
23668 emit_jump_insn (gen_jump (*done_label));
23669 emit_barrier ();
23671 else
23672 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23673 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23675 /* Start memcpy for COUNT >= SIZE. */
23676 if (loop_label)
23678 emit_label (loop_label);
23679 LABEL_NUSES (loop_label) = 1;
23682 /* Copy first desired_align bytes. */
23683 if (!issetmem)
23684 srcmem = change_address (srcmem, mode, *srcptr);
23685 destmem = change_address (destmem, mode, *destptr);
23686 modesize = GEN_INT (GET_MODE_SIZE (mode));
23687 for (n = 0; prolog_size < desired_align - align; n++)
23689 if (issetmem)
23690 emit_move_insn (destmem, mode_value);
23691 else
23693 emit_move_insn (destmem, srcmem);
23694 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23696 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23697 prolog_size += GET_MODE_SIZE (mode);
23701 /* Copy last SIZE bytes. */
23702 destmem = offset_address (destmem, *count, 1);
23703 destmem = offset_address (destmem,
23704 GEN_INT (-size - prolog_size),
23706 if (issetmem)
23707 emit_move_insn (destmem, mode_value);
23708 else
23710 srcmem = offset_address (srcmem, *count, 1);
23711 srcmem = offset_address (srcmem,
23712 GEN_INT (-size - prolog_size),
23714 emit_move_insn (destmem, srcmem);
23716 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23718 destmem = offset_address (destmem, modesize, 1);
23719 if (issetmem)
23720 emit_move_insn (destmem, mode_value);
23721 else
23723 srcmem = offset_address (srcmem, modesize, 1);
23724 emit_move_insn (destmem, srcmem);
23728 /* Align destination. */
23729 if (desired_align > 1 && desired_align > align)
23731 rtx saveddest = *destptr;
23733 gcc_assert (desired_align <= size);
23734 /* Align destptr up, place it to new register. */
23735 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23736 GEN_INT (prolog_size),
23737 NULL_RTX, 1, OPTAB_DIRECT);
23738 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
23739 REG_POINTER (*destptr) = 1;
23740 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23741 GEN_INT (-desired_align),
23742 *destptr, 1, OPTAB_DIRECT);
23743 /* See how many bytes we skipped. */
23744 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23745 *destptr,
23746 saveddest, 1, OPTAB_DIRECT);
23747 /* Adjust srcptr and count. */
23748 if (!issetmem)
23749 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
23750 saveddest, *srcptr, 1, OPTAB_DIRECT);
23751 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23752 saveddest, *count, 1, OPTAB_DIRECT);
23753 /* We copied at most size + prolog_size. */
23754 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23755 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23756 else
23757 *min_size = 0;
23759 /* Our loops always round down the bock size, but for dispatch to library
23760 we need precise value. */
23761 if (dynamic_check)
23762 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23763 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23765 else
23767 gcc_assert (prolog_size == 0);
23768 /* Decrease count, so we won't end up copying last word twice. */
23769 if (!CONST_INT_P (*count))
23770 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23771 constm1_rtx, *count, 1, OPTAB_DIRECT);
23772 else
23773 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23774 if (*min_size)
23775 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23780 /* This function is like the previous one, except here we know how many bytes
23781 need to be copied. That allows us to update alignment not only of DST, which
23782 is returned, but also of SRC, which is passed as a pointer for that
23783 reason. */
23784 static rtx
23785 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23786 rtx srcreg, rtx value, rtx vec_value,
23787 int desired_align, int align_bytes,
23788 bool issetmem)
23790 rtx src = NULL;
23791 rtx orig_dst = dst;
23792 rtx orig_src = NULL;
23793 int piece_size = 1;
23794 int copied_bytes = 0;
23796 if (!issetmem)
23798 gcc_assert (srcp != NULL);
23799 src = *srcp;
23800 orig_src = src;
23803 for (piece_size = 1;
23804 piece_size <= desired_align && copied_bytes < align_bytes;
23805 piece_size <<= 1)
23807 if (align_bytes & piece_size)
23809 if (issetmem)
23811 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23812 dst = emit_memset (dst, destreg, vec_value, piece_size);
23813 else
23814 dst = emit_memset (dst, destreg, value, piece_size);
23816 else
23817 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23818 copied_bytes += piece_size;
23821 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23822 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23823 if (MEM_SIZE_KNOWN_P (orig_dst))
23824 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23826 if (!issetmem)
23828 int src_align_bytes = get_mem_align_offset (src, desired_align
23829 * BITS_PER_UNIT);
23830 if (src_align_bytes >= 0)
23831 src_align_bytes = desired_align - src_align_bytes;
23832 if (src_align_bytes >= 0)
23834 unsigned int src_align;
23835 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23837 if ((src_align_bytes & (src_align - 1))
23838 == (align_bytes & (src_align - 1)))
23839 break;
23841 if (src_align > (unsigned int) desired_align)
23842 src_align = desired_align;
23843 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23844 set_mem_align (src, src_align * BITS_PER_UNIT);
23846 if (MEM_SIZE_KNOWN_P (orig_src))
23847 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23848 *srcp = src;
23851 return dst;
23854 /* Return true if ALG can be used in current context.
23855 Assume we expand memset if MEMSET is true. */
23856 static bool
23857 alg_usable_p (enum stringop_alg alg, bool memset)
23859 if (alg == no_stringop)
23860 return false;
23861 if (alg == vector_loop)
23862 return TARGET_SSE || TARGET_AVX;
23863 /* Algorithms using the rep prefix want at least edi and ecx;
23864 additionally, memset wants eax and memcpy wants esi. Don't
23865 consider such algorithms if the user has appropriated those
23866 registers for their own purposes. */
23867 if (alg == rep_prefix_1_byte
23868 || alg == rep_prefix_4_byte
23869 || alg == rep_prefix_8_byte)
23870 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23871 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23872 return true;
23875 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23876 static enum stringop_alg
23877 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23878 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23879 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23881 const struct stringop_algs * algs;
23882 bool optimize_for_speed;
23883 int max = 0;
23884 const struct processor_costs *cost;
23885 int i;
23886 bool any_alg_usable_p = false;
23888 *noalign = false;
23889 *dynamic_check = -1;
23891 /* Even if the string operation call is cold, we still might spend a lot
23892 of time processing large blocks. */
23893 if (optimize_function_for_size_p (cfun)
23894 || (optimize_insn_for_size_p ()
23895 && (max_size < 256
23896 || (expected_size != -1 && expected_size < 256))))
23897 optimize_for_speed = false;
23898 else
23899 optimize_for_speed = true;
23901 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23902 if (memset)
23903 algs = &cost->memset[TARGET_64BIT != 0];
23904 else
23905 algs = &cost->memcpy[TARGET_64BIT != 0];
23907 /* See maximal size for user defined algorithm. */
23908 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23910 enum stringop_alg candidate = algs->size[i].alg;
23911 bool usable = alg_usable_p (candidate, memset);
23912 any_alg_usable_p |= usable;
23914 if (candidate != libcall && candidate && usable)
23915 max = algs->size[i].max;
23918 /* If expected size is not known but max size is small enough
23919 so inline version is a win, set expected size into
23920 the range. */
23921 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23922 && expected_size == -1)
23923 expected_size = min_size / 2 + max_size / 2;
23925 /* If user specified the algorithm, honnor it if possible. */
23926 if (ix86_stringop_alg != no_stringop
23927 && alg_usable_p (ix86_stringop_alg, memset))
23928 return ix86_stringop_alg;
23929 /* rep; movq or rep; movl is the smallest variant. */
23930 else if (!optimize_for_speed)
23932 *noalign = true;
23933 if (!count || (count & 3) || (memset && !zero_memset))
23934 return alg_usable_p (rep_prefix_1_byte, memset)
23935 ? rep_prefix_1_byte : loop_1_byte;
23936 else
23937 return alg_usable_p (rep_prefix_4_byte, memset)
23938 ? rep_prefix_4_byte : loop;
23940 /* Very tiny blocks are best handled via the loop, REP is expensive to
23941 setup. */
23942 else if (expected_size != -1 && expected_size < 4)
23943 return loop_1_byte;
23944 else if (expected_size != -1)
23946 enum stringop_alg alg = libcall;
23947 bool alg_noalign = false;
23948 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23950 /* We get here if the algorithms that were not libcall-based
23951 were rep-prefix based and we are unable to use rep prefixes
23952 based on global register usage. Break out of the loop and
23953 use the heuristic below. */
23954 if (algs->size[i].max == 0)
23955 break;
23956 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23958 enum stringop_alg candidate = algs->size[i].alg;
23960 if (candidate != libcall && alg_usable_p (candidate, memset))
23962 alg = candidate;
23963 alg_noalign = algs->size[i].noalign;
23965 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23966 last non-libcall inline algorithm. */
23967 if (TARGET_INLINE_ALL_STRINGOPS)
23969 /* When the current size is best to be copied by a libcall,
23970 but we are still forced to inline, run the heuristic below
23971 that will pick code for medium sized blocks. */
23972 if (alg != libcall)
23974 *noalign = alg_noalign;
23975 return alg;
23977 else if (!any_alg_usable_p)
23978 break;
23980 else if (alg_usable_p (candidate, memset))
23982 *noalign = algs->size[i].noalign;
23983 return candidate;
23988 /* When asked to inline the call anyway, try to pick meaningful choice.
23989 We look for maximal size of block that is faster to copy by hand and
23990 take blocks of at most of that size guessing that average size will
23991 be roughly half of the block.
23993 If this turns out to be bad, we might simply specify the preferred
23994 choice in ix86_costs. */
23995 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23996 && (algs->unknown_size == libcall
23997 || !alg_usable_p (algs->unknown_size, memset)))
23999 enum stringop_alg alg;
24001 /* If there aren't any usable algorithms, then recursing on
24002 smaller sizes isn't going to find anything. Just return the
24003 simple byte-at-a-time copy loop. */
24004 if (!any_alg_usable_p)
24006 /* Pick something reasonable. */
24007 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24008 *dynamic_check = 128;
24009 return loop_1_byte;
24011 if (max <= 0)
24012 max = 4096;
24013 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24014 zero_memset, dynamic_check, noalign);
24015 gcc_assert (*dynamic_check == -1);
24016 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24017 *dynamic_check = max;
24018 else
24019 gcc_assert (alg != libcall);
24020 return alg;
24022 return (alg_usable_p (algs->unknown_size, memset)
24023 ? algs->unknown_size : libcall);
24026 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24027 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24028 static int
24029 decide_alignment (int align,
24030 enum stringop_alg alg,
24031 int expected_size,
24032 enum machine_mode move_mode)
24034 int desired_align = 0;
24036 gcc_assert (alg != no_stringop);
24038 if (alg == libcall)
24039 return 0;
24040 if (move_mode == VOIDmode)
24041 return 0;
24043 desired_align = GET_MODE_SIZE (move_mode);
24044 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24045 copying whole cacheline at once. */
24046 if (TARGET_PENTIUMPRO
24047 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24048 desired_align = 8;
24050 if (optimize_size)
24051 desired_align = 1;
24052 if (desired_align < align)
24053 desired_align = align;
24054 if (expected_size != -1 && expected_size < 4)
24055 desired_align = align;
24057 return desired_align;
24061 /* Helper function for memcpy. For QImode value 0xXY produce
24062 0xXYXYXYXY of wide specified by MODE. This is essentially
24063 a * 0x10101010, but we can do slightly better than
24064 synth_mult by unwinding the sequence by hand on CPUs with
24065 slow multiply. */
24066 static rtx
24067 promote_duplicated_reg (enum machine_mode mode, rtx val)
24069 enum machine_mode valmode = GET_MODE (val);
24070 rtx tmp;
24071 int nops = mode == DImode ? 3 : 2;
24073 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24074 if (val == const0_rtx)
24075 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24076 if (CONST_INT_P (val))
24078 HOST_WIDE_INT v = INTVAL (val) & 255;
24080 v |= v << 8;
24081 v |= v << 16;
24082 if (mode == DImode)
24083 v |= (v << 16) << 16;
24084 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24087 if (valmode == VOIDmode)
24088 valmode = QImode;
24089 if (valmode != QImode)
24090 val = gen_lowpart (QImode, val);
24091 if (mode == QImode)
24092 return val;
24093 if (!TARGET_PARTIAL_REG_STALL)
24094 nops--;
24095 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24096 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24097 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24098 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24100 rtx reg = convert_modes (mode, QImode, val, true);
24101 tmp = promote_duplicated_reg (mode, const1_rtx);
24102 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24103 OPTAB_DIRECT);
24105 else
24107 rtx reg = convert_modes (mode, QImode, val, true);
24109 if (!TARGET_PARTIAL_REG_STALL)
24110 if (mode == SImode)
24111 emit_insn (gen_movsi_insv_1 (reg, reg));
24112 else
24113 emit_insn (gen_movdi_insv_1 (reg, reg));
24114 else
24116 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24117 NULL, 1, OPTAB_DIRECT);
24118 reg =
24119 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24121 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24122 NULL, 1, OPTAB_DIRECT);
24123 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24124 if (mode == SImode)
24125 return reg;
24126 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24127 NULL, 1, OPTAB_DIRECT);
24128 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24129 return reg;
24133 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24134 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24135 alignment from ALIGN to DESIRED_ALIGN. */
24136 static rtx
24137 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24138 int align)
24140 rtx promoted_val;
24142 if (TARGET_64BIT
24143 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24144 promoted_val = promote_duplicated_reg (DImode, val);
24145 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24146 promoted_val = promote_duplicated_reg (SImode, val);
24147 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24148 promoted_val = promote_duplicated_reg (HImode, val);
24149 else
24150 promoted_val = val;
24152 return promoted_val;
24155 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24156 operations when profitable. The code depends upon architecture, block size
24157 and alignment, but always has one of the following overall structures:
24159 Aligned move sequence:
24161 1) Prologue guard: Conditional that jumps up to epilogues for small
24162 blocks that can be handled by epilogue alone. This is faster
24163 but also needed for correctness, since prologue assume the block
24164 is larger than the desired alignment.
24166 Optional dynamic check for size and libcall for large
24167 blocks is emitted here too, with -minline-stringops-dynamically.
24169 2) Prologue: copy first few bytes in order to get destination
24170 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24171 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24172 copied. We emit either a jump tree on power of two sized
24173 blocks, or a byte loop.
24175 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24176 with specified algorithm.
24178 4) Epilogue: code copying tail of the block that is too small to be
24179 handled by main body (or up to size guarded by prologue guard).
24181 Misaligned move sequence
24183 1) missaligned move prologue/epilogue containing:
24184 a) Prologue handling small memory blocks and jumping to done_label
24185 (skipped if blocks are known to be large enough)
24186 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24187 needed by single possibly misaligned move
24188 (skipped if alignment is not needed)
24189 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24191 2) Zero size guard dispatching to done_label, if needed
24193 3) dispatch to library call, if needed,
24195 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24196 with specified algorithm. */
24197 bool
24198 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24199 rtx align_exp, rtx expected_align_exp,
24200 rtx expected_size_exp, rtx min_size_exp,
24201 rtx max_size_exp, rtx probable_max_size_exp,
24202 bool issetmem)
24204 rtx destreg;
24205 rtx srcreg = NULL;
24206 rtx label = NULL;
24207 rtx tmp;
24208 rtx jump_around_label = NULL;
24209 HOST_WIDE_INT align = 1;
24210 unsigned HOST_WIDE_INT count = 0;
24211 HOST_WIDE_INT expected_size = -1;
24212 int size_needed = 0, epilogue_size_needed;
24213 int desired_align = 0, align_bytes = 0;
24214 enum stringop_alg alg;
24215 rtx promoted_val = NULL;
24216 rtx vec_promoted_val = NULL;
24217 bool force_loopy_epilogue = false;
24218 int dynamic_check;
24219 bool need_zero_guard = false;
24220 bool noalign;
24221 enum machine_mode move_mode = VOIDmode;
24222 int unroll_factor = 1;
24223 /* TODO: Once value ranges are available, fill in proper data. */
24224 unsigned HOST_WIDE_INT min_size = 0;
24225 unsigned HOST_WIDE_INT max_size = -1;
24226 unsigned HOST_WIDE_INT probable_max_size = -1;
24227 bool misaligned_prologue_used = false;
24229 if (CONST_INT_P (align_exp))
24230 align = INTVAL (align_exp);
24231 /* i386 can do misaligned access on reasonably increased cost. */
24232 if (CONST_INT_P (expected_align_exp)
24233 && INTVAL (expected_align_exp) > align)
24234 align = INTVAL (expected_align_exp);
24235 /* ALIGN is the minimum of destination and source alignment, but we care here
24236 just about destination alignment. */
24237 else if (!issetmem
24238 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24239 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24241 if (CONST_INT_P (count_exp))
24243 min_size = max_size = probable_max_size = count = expected_size
24244 = INTVAL (count_exp);
24245 /* When COUNT is 0, there is nothing to do. */
24246 if (!count)
24247 return true;
24249 else
24251 if (min_size_exp)
24252 min_size = INTVAL (min_size_exp);
24253 if (max_size_exp)
24254 max_size = INTVAL (max_size_exp);
24255 if (probable_max_size_exp)
24256 probable_max_size = INTVAL (probable_max_size_exp);
24257 if (CONST_INT_P (expected_size_exp))
24258 expected_size = INTVAL (expected_size_exp);
24261 /* Make sure we don't need to care about overflow later on. */
24262 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24263 return false;
24265 /* Step 0: Decide on preferred algorithm, desired alignment and
24266 size of chunks to be copied by main loop. */
24267 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24268 issetmem,
24269 issetmem && val_exp == const0_rtx,
24270 &dynamic_check, &noalign);
24271 if (alg == libcall)
24272 return false;
24273 gcc_assert (alg != no_stringop);
24275 /* For now vector-version of memset is generated only for memory zeroing, as
24276 creating of promoted vector value is very cheap in this case. */
24277 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24278 alg = unrolled_loop;
24280 if (!count)
24281 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24282 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24283 if (!issetmem)
24284 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24286 unroll_factor = 1;
24287 move_mode = word_mode;
24288 switch (alg)
24290 case libcall:
24291 case no_stringop:
24292 case last_alg:
24293 gcc_unreachable ();
24294 case loop_1_byte:
24295 need_zero_guard = true;
24296 move_mode = QImode;
24297 break;
24298 case loop:
24299 need_zero_guard = true;
24300 break;
24301 case unrolled_loop:
24302 need_zero_guard = true;
24303 unroll_factor = (TARGET_64BIT ? 4 : 2);
24304 break;
24305 case vector_loop:
24306 need_zero_guard = true;
24307 unroll_factor = 4;
24308 /* Find the widest supported mode. */
24309 move_mode = word_mode;
24310 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24311 != CODE_FOR_nothing)
24312 move_mode = GET_MODE_WIDER_MODE (move_mode);
24314 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24315 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24316 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24318 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24319 move_mode = mode_for_vector (word_mode, nunits);
24320 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24321 move_mode = word_mode;
24323 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24324 break;
24325 case rep_prefix_8_byte:
24326 move_mode = DImode;
24327 break;
24328 case rep_prefix_4_byte:
24329 move_mode = SImode;
24330 break;
24331 case rep_prefix_1_byte:
24332 move_mode = QImode;
24333 break;
24335 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24336 epilogue_size_needed = size_needed;
24338 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24339 if (!TARGET_ALIGN_STRINGOPS || noalign)
24340 align = desired_align;
24342 /* Step 1: Prologue guard. */
24344 /* Alignment code needs count to be in register. */
24345 if (CONST_INT_P (count_exp) && desired_align > align)
24347 if (INTVAL (count_exp) > desired_align
24348 && INTVAL (count_exp) > size_needed)
24350 align_bytes
24351 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24352 if (align_bytes <= 0)
24353 align_bytes = 0;
24354 else
24355 align_bytes = desired_align - align_bytes;
24357 if (align_bytes == 0)
24358 count_exp = force_reg (counter_mode (count_exp), count_exp);
24360 gcc_assert (desired_align >= 1 && align >= 1);
24362 /* Misaligned move sequences handle both prologue and epilogue at once.
24363 Default code generation results in a smaller code for large alignments
24364 and also avoids redundant job when sizes are known precisely. */
24365 misaligned_prologue_used
24366 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24367 && MAX (desired_align, epilogue_size_needed) <= 32
24368 && desired_align <= epilogue_size_needed
24369 && ((desired_align > align && !align_bytes)
24370 || (!count && epilogue_size_needed > 1)));
24372 /* Do the cheap promotion to allow better CSE across the
24373 main loop and epilogue (ie one load of the big constant in the
24374 front of all code.
24375 For now the misaligned move sequences do not have fast path
24376 without broadcasting. */
24377 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24379 if (alg == vector_loop)
24381 gcc_assert (val_exp == const0_rtx);
24382 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24383 promoted_val = promote_duplicated_reg_to_size (val_exp,
24384 GET_MODE_SIZE (word_mode),
24385 desired_align, align);
24387 else
24389 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24390 desired_align, align);
24393 /* Misaligned move sequences handles both prologues and epilogues at once.
24394 Default code generation results in smaller code for large alignments and
24395 also avoids redundant job when sizes are known precisely. */
24396 if (misaligned_prologue_used)
24398 /* Misaligned move prologue handled small blocks by itself. */
24399 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24400 (dst, src, &destreg, &srcreg,
24401 move_mode, promoted_val, vec_promoted_val,
24402 &count_exp,
24403 &jump_around_label,
24404 desired_align < align
24405 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24406 desired_align, align, &min_size, dynamic_check, issetmem);
24407 if (!issetmem)
24408 src = change_address (src, BLKmode, srcreg);
24409 dst = change_address (dst, BLKmode, destreg);
24410 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24411 epilogue_size_needed = 0;
24412 if (need_zero_guard && !min_size)
24414 /* It is possible that we copied enough so the main loop will not
24415 execute. */
24416 gcc_assert (size_needed > 1);
24417 if (jump_around_label == NULL_RTX)
24418 jump_around_label = gen_label_rtx ();
24419 emit_cmp_and_jump_insns (count_exp,
24420 GEN_INT (size_needed),
24421 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24422 if (expected_size == -1
24423 || expected_size < (desired_align - align) / 2 + size_needed)
24424 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24425 else
24426 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24429 /* Ensure that alignment prologue won't copy past end of block. */
24430 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24432 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24433 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24434 Make sure it is power of 2. */
24435 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24437 /* To improve performance of small blocks, we jump around the VAL
24438 promoting mode. This mean that if the promoted VAL is not constant,
24439 we might not use it in the epilogue and have to use byte
24440 loop variant. */
24441 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24442 force_loopy_epilogue = true;
24443 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24444 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24446 /* If main algorithm works on QImode, no epilogue is needed.
24447 For small sizes just don't align anything. */
24448 if (size_needed == 1)
24449 desired_align = align;
24450 else
24451 goto epilogue;
24453 else if (!count
24454 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24456 label = gen_label_rtx ();
24457 emit_cmp_and_jump_insns (count_exp,
24458 GEN_INT (epilogue_size_needed),
24459 LTU, 0, counter_mode (count_exp), 1, label);
24460 if (expected_size == -1 || expected_size < epilogue_size_needed)
24461 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24462 else
24463 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24467 /* Emit code to decide on runtime whether library call or inline should be
24468 used. */
24469 if (dynamic_check != -1)
24471 if (!issetmem && CONST_INT_P (count_exp))
24473 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24475 emit_block_move_via_libcall (dst, src, count_exp, false);
24476 count_exp = const0_rtx;
24477 goto epilogue;
24480 else
24482 rtx hot_label = gen_label_rtx ();
24483 if (jump_around_label == NULL_RTX)
24484 jump_around_label = gen_label_rtx ();
24485 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24486 LEU, 0, counter_mode (count_exp),
24487 1, hot_label);
24488 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24489 if (issetmem)
24490 set_storage_via_libcall (dst, count_exp, val_exp, false);
24491 else
24492 emit_block_move_via_libcall (dst, src, count_exp, false);
24493 emit_jump (jump_around_label);
24494 emit_label (hot_label);
24498 /* Step 2: Alignment prologue. */
24499 /* Do the expensive promotion once we branched off the small blocks. */
24500 if (issetmem && !promoted_val)
24501 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24502 desired_align, align);
24504 if (desired_align > align && !misaligned_prologue_used)
24506 if (align_bytes == 0)
24508 /* Except for the first move in prologue, we no longer know
24509 constant offset in aliasing info. It don't seems to worth
24510 the pain to maintain it for the first move, so throw away
24511 the info early. */
24512 dst = change_address (dst, BLKmode, destreg);
24513 if (!issetmem)
24514 src = change_address (src, BLKmode, srcreg);
24515 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24516 promoted_val, vec_promoted_val,
24517 count_exp, align, desired_align,
24518 issetmem);
24519 /* At most desired_align - align bytes are copied. */
24520 if (min_size < (unsigned)(desired_align - align))
24521 min_size = 0;
24522 else
24523 min_size -= desired_align - align;
24525 else
24527 /* If we know how many bytes need to be stored before dst is
24528 sufficiently aligned, maintain aliasing info accurately. */
24529 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24530 srcreg,
24531 promoted_val,
24532 vec_promoted_val,
24533 desired_align,
24534 align_bytes,
24535 issetmem);
24537 count_exp = plus_constant (counter_mode (count_exp),
24538 count_exp, -align_bytes);
24539 count -= align_bytes;
24540 min_size -= align_bytes;
24541 max_size -= align_bytes;
24543 if (need_zero_guard
24544 && !min_size
24545 && (count < (unsigned HOST_WIDE_INT) size_needed
24546 || (align_bytes == 0
24547 && count < ((unsigned HOST_WIDE_INT) size_needed
24548 + desired_align - align))))
24550 /* It is possible that we copied enough so the main loop will not
24551 execute. */
24552 gcc_assert (size_needed > 1);
24553 if (label == NULL_RTX)
24554 label = gen_label_rtx ();
24555 emit_cmp_and_jump_insns (count_exp,
24556 GEN_INT (size_needed),
24557 LTU, 0, counter_mode (count_exp), 1, label);
24558 if (expected_size == -1
24559 || expected_size < (desired_align - align) / 2 + size_needed)
24560 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24561 else
24562 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24565 if (label && size_needed == 1)
24567 emit_label (label);
24568 LABEL_NUSES (label) = 1;
24569 label = NULL;
24570 epilogue_size_needed = 1;
24571 if (issetmem)
24572 promoted_val = val_exp;
24574 else if (label == NULL_RTX && !misaligned_prologue_used)
24575 epilogue_size_needed = size_needed;
24577 /* Step 3: Main loop. */
24579 switch (alg)
24581 case libcall:
24582 case no_stringop:
24583 case last_alg:
24584 gcc_unreachable ();
24585 case loop_1_byte:
24586 case loop:
24587 case unrolled_loop:
24588 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24589 count_exp, move_mode, unroll_factor,
24590 expected_size, issetmem);
24591 break;
24592 case vector_loop:
24593 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24594 vec_promoted_val, count_exp, move_mode,
24595 unroll_factor, expected_size, issetmem);
24596 break;
24597 case rep_prefix_8_byte:
24598 case rep_prefix_4_byte:
24599 case rep_prefix_1_byte:
24600 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24601 val_exp, count_exp, move_mode, issetmem);
24602 break;
24604 /* Adjust properly the offset of src and dest memory for aliasing. */
24605 if (CONST_INT_P (count_exp))
24607 if (!issetmem)
24608 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24609 (count / size_needed) * size_needed);
24610 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24611 (count / size_needed) * size_needed);
24613 else
24615 if (!issetmem)
24616 src = change_address (src, BLKmode, srcreg);
24617 dst = change_address (dst, BLKmode, destreg);
24620 /* Step 4: Epilogue to copy the remaining bytes. */
24621 epilogue:
24622 if (label)
24624 /* When the main loop is done, COUNT_EXP might hold original count,
24625 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24626 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24627 bytes. Compensate if needed. */
24629 if (size_needed < epilogue_size_needed)
24631 tmp =
24632 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24633 GEN_INT (size_needed - 1), count_exp, 1,
24634 OPTAB_DIRECT);
24635 if (tmp != count_exp)
24636 emit_move_insn (count_exp, tmp);
24638 emit_label (label);
24639 LABEL_NUSES (label) = 1;
24642 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24644 if (force_loopy_epilogue)
24645 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24646 epilogue_size_needed);
24647 else
24649 if (issetmem)
24650 expand_setmem_epilogue (dst, destreg, promoted_val,
24651 vec_promoted_val, count_exp,
24652 epilogue_size_needed);
24653 else
24654 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24655 epilogue_size_needed);
24658 if (jump_around_label)
24659 emit_label (jump_around_label);
24660 return true;
24664 /* Expand the appropriate insns for doing strlen if not just doing
24665 repnz; scasb
24667 out = result, initialized with the start address
24668 align_rtx = alignment of the address.
24669 scratch = scratch register, initialized with the startaddress when
24670 not aligned, otherwise undefined
24672 This is just the body. It needs the initializations mentioned above and
24673 some address computing at the end. These things are done in i386.md. */
24675 static void
24676 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24678 int align;
24679 rtx tmp;
24680 rtx align_2_label = NULL_RTX;
24681 rtx align_3_label = NULL_RTX;
24682 rtx align_4_label = gen_label_rtx ();
24683 rtx end_0_label = gen_label_rtx ();
24684 rtx mem;
24685 rtx tmpreg = gen_reg_rtx (SImode);
24686 rtx scratch = gen_reg_rtx (SImode);
24687 rtx cmp;
24689 align = 0;
24690 if (CONST_INT_P (align_rtx))
24691 align = INTVAL (align_rtx);
24693 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24695 /* Is there a known alignment and is it less than 4? */
24696 if (align < 4)
24698 rtx scratch1 = gen_reg_rtx (Pmode);
24699 emit_move_insn (scratch1, out);
24700 /* Is there a known alignment and is it not 2? */
24701 if (align != 2)
24703 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24704 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24706 /* Leave just the 3 lower bits. */
24707 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24708 NULL_RTX, 0, OPTAB_WIDEN);
24710 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24711 Pmode, 1, align_4_label);
24712 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24713 Pmode, 1, align_2_label);
24714 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24715 Pmode, 1, align_3_label);
24717 else
24719 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24720 check if is aligned to 4 - byte. */
24722 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24723 NULL_RTX, 0, OPTAB_WIDEN);
24725 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24726 Pmode, 1, align_4_label);
24729 mem = change_address (src, QImode, out);
24731 /* Now compare the bytes. */
24733 /* Compare the first n unaligned byte on a byte per byte basis. */
24734 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24735 QImode, 1, end_0_label);
24737 /* Increment the address. */
24738 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24740 /* Not needed with an alignment of 2 */
24741 if (align != 2)
24743 emit_label (align_2_label);
24745 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24746 end_0_label);
24748 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24750 emit_label (align_3_label);
24753 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24754 end_0_label);
24756 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24759 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24760 align this loop. It gives only huge programs, but does not help to
24761 speed up. */
24762 emit_label (align_4_label);
24764 mem = change_address (src, SImode, out);
24765 emit_move_insn (scratch, mem);
24766 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24768 /* This formula yields a nonzero result iff one of the bytes is zero.
24769 This saves three branches inside loop and many cycles. */
24771 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24772 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24773 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24774 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24775 gen_int_mode (0x80808080, SImode)));
24776 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24777 align_4_label);
24779 if (TARGET_CMOVE)
24781 rtx reg = gen_reg_rtx (SImode);
24782 rtx reg2 = gen_reg_rtx (Pmode);
24783 emit_move_insn (reg, tmpreg);
24784 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24786 /* If zero is not in the first two bytes, move two bytes forward. */
24787 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24788 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24789 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24790 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24791 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24792 reg,
24793 tmpreg)));
24794 /* Emit lea manually to avoid clobbering of flags. */
24795 emit_insn (gen_rtx_SET (SImode, reg2,
24796 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24798 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24799 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24800 emit_insn (gen_rtx_SET (VOIDmode, out,
24801 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24802 reg2,
24803 out)));
24805 else
24807 rtx end_2_label = gen_label_rtx ();
24808 /* Is zero in the first two bytes? */
24810 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24811 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24812 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24813 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24814 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24815 pc_rtx);
24816 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24817 JUMP_LABEL (tmp) = end_2_label;
24819 /* Not in the first two. Move two bytes forward. */
24820 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24821 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24823 emit_label (end_2_label);
24827 /* Avoid branch in fixing the byte. */
24828 tmpreg = gen_lowpart (QImode, tmpreg);
24829 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24830 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24831 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24832 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24834 emit_label (end_0_label);
24837 /* Expand strlen. */
24839 bool
24840 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24842 rtx addr, scratch1, scratch2, scratch3, scratch4;
24844 /* The generic case of strlen expander is long. Avoid it's
24845 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24847 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24848 && !TARGET_INLINE_ALL_STRINGOPS
24849 && !optimize_insn_for_size_p ()
24850 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24851 return false;
24853 addr = force_reg (Pmode, XEXP (src, 0));
24854 scratch1 = gen_reg_rtx (Pmode);
24856 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24857 && !optimize_insn_for_size_p ())
24859 /* Well it seems that some optimizer does not combine a call like
24860 foo(strlen(bar), strlen(bar));
24861 when the move and the subtraction is done here. It does calculate
24862 the length just once when these instructions are done inside of
24863 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24864 often used and I use one fewer register for the lifetime of
24865 output_strlen_unroll() this is better. */
24867 emit_move_insn (out, addr);
24869 ix86_expand_strlensi_unroll_1 (out, src, align);
24871 /* strlensi_unroll_1 returns the address of the zero at the end of
24872 the string, like memchr(), so compute the length by subtracting
24873 the start address. */
24874 emit_insn (ix86_gen_sub3 (out, out, addr));
24876 else
24878 rtx unspec;
24880 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24881 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24882 return false;
24884 scratch2 = gen_reg_rtx (Pmode);
24885 scratch3 = gen_reg_rtx (Pmode);
24886 scratch4 = force_reg (Pmode, constm1_rtx);
24888 emit_move_insn (scratch3, addr);
24889 eoschar = force_reg (QImode, eoschar);
24891 src = replace_equiv_address_nv (src, scratch3);
24893 /* If .md starts supporting :P, this can be done in .md. */
24894 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24895 scratch4), UNSPEC_SCAS);
24896 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24897 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24898 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24900 return true;
24903 /* For given symbol (function) construct code to compute address of it's PLT
24904 entry in large x86-64 PIC model. */
24905 static rtx
24906 construct_plt_address (rtx symbol)
24908 rtx tmp, unspec;
24910 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24911 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24912 gcc_assert (Pmode == DImode);
24914 tmp = gen_reg_rtx (Pmode);
24915 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24917 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24918 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24919 return tmp;
24923 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24924 rtx callarg2,
24925 rtx pop, bool sibcall)
24927 unsigned int const cregs_size
24928 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24929 rtx vec[3 + cregs_size];
24930 rtx use = NULL, call;
24931 unsigned int vec_len = 0;
24933 if (pop == const0_rtx)
24934 pop = NULL;
24935 gcc_assert (!TARGET_64BIT || !pop);
24937 if (TARGET_MACHO && !TARGET_64BIT)
24939 #if TARGET_MACHO
24940 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24941 fnaddr = machopic_indirect_call_target (fnaddr);
24942 #endif
24944 else
24946 /* Static functions and indirect calls don't need the pic register. */
24947 if (flag_pic
24948 && (!TARGET_64BIT
24949 || (ix86_cmodel == CM_LARGE_PIC
24950 && DEFAULT_ABI != MS_ABI))
24951 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24952 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24953 use_reg (&use, pic_offset_table_rtx);
24956 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24958 rtx al = gen_rtx_REG (QImode, AX_REG);
24959 emit_move_insn (al, callarg2);
24960 use_reg (&use, al);
24963 if (ix86_cmodel == CM_LARGE_PIC
24964 && !TARGET_PECOFF
24965 && MEM_P (fnaddr)
24966 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24967 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24968 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24969 else if (sibcall
24970 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24971 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24973 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24974 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24977 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24978 if (retval)
24979 call = gen_rtx_SET (VOIDmode, retval, call);
24980 vec[vec_len++] = call;
24982 if (pop)
24984 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24985 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24986 vec[vec_len++] = pop;
24989 if (TARGET_64BIT_MS_ABI
24990 && (!callarg2 || INTVAL (callarg2) != -2))
24992 unsigned i;
24994 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24995 UNSPEC_MS_TO_SYSV_CALL);
24997 for (i = 0; i < cregs_size; i++)
24999 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25000 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25002 vec[vec_len++]
25003 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
25007 if (vec_len > 1)
25008 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25009 call = emit_call_insn (call);
25010 if (use)
25011 CALL_INSN_FUNCTION_USAGE (call) = use;
25013 return call;
25016 /* Output the assembly for a call instruction. */
25018 const char *
25019 ix86_output_call_insn (rtx insn, rtx call_op)
25021 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25022 bool seh_nop_p = false;
25023 const char *xasm;
25025 if (SIBLING_CALL_P (insn))
25027 if (direct_p)
25028 xasm = "jmp\t%P0";
25029 /* SEH epilogue detection requires the indirect branch case
25030 to include REX.W. */
25031 else if (TARGET_SEH)
25032 xasm = "rex.W jmp %A0";
25033 else
25034 xasm = "jmp\t%A0";
25036 output_asm_insn (xasm, &call_op);
25037 return "";
25040 /* SEH unwinding can require an extra nop to be emitted in several
25041 circumstances. Determine if we have one of those. */
25042 if (TARGET_SEH)
25044 rtx i;
25046 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25048 /* If we get to another real insn, we don't need the nop. */
25049 if (INSN_P (i))
25050 break;
25052 /* If we get to the epilogue note, prevent a catch region from
25053 being adjacent to the standard epilogue sequence. If non-
25054 call-exceptions, we'll have done this during epilogue emission. */
25055 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25056 && !flag_non_call_exceptions
25057 && !can_throw_internal (insn))
25059 seh_nop_p = true;
25060 break;
25064 /* If we didn't find a real insn following the call, prevent the
25065 unwinder from looking into the next function. */
25066 if (i == NULL)
25067 seh_nop_p = true;
25070 if (direct_p)
25071 xasm = "call\t%P0";
25072 else
25073 xasm = "call\t%A0";
25075 output_asm_insn (xasm, &call_op);
25077 if (seh_nop_p)
25078 return "nop";
25080 return "";
25083 /* Clear stack slot assignments remembered from previous functions.
25084 This is called from INIT_EXPANDERS once before RTL is emitted for each
25085 function. */
25087 static struct machine_function *
25088 ix86_init_machine_status (void)
25090 struct machine_function *f;
25092 f = ggc_alloc_cleared_machine_function ();
25093 f->use_fast_prologue_epilogue_nregs = -1;
25094 f->call_abi = ix86_abi;
25096 return f;
25099 /* Return a MEM corresponding to a stack slot with mode MODE.
25100 Allocate a new slot if necessary.
25102 The RTL for a function can have several slots available: N is
25103 which slot to use. */
25106 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25108 struct stack_local_entry *s;
25110 gcc_assert (n < MAX_386_STACK_LOCALS);
25112 for (s = ix86_stack_locals; s; s = s->next)
25113 if (s->mode == mode && s->n == n)
25114 return validize_mem (copy_rtx (s->rtl));
25116 s = ggc_alloc_stack_local_entry ();
25117 s->n = n;
25118 s->mode = mode;
25119 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25121 s->next = ix86_stack_locals;
25122 ix86_stack_locals = s;
25123 return validize_mem (s->rtl);
25126 static void
25127 ix86_instantiate_decls (void)
25129 struct stack_local_entry *s;
25131 for (s = ix86_stack_locals; s; s = s->next)
25132 if (s->rtl != NULL_RTX)
25133 instantiate_decl_rtl (s->rtl);
25136 /* Check whether x86 address PARTS is a pc-relative address. */
25138 static bool
25139 rip_relative_addr_p (struct ix86_address *parts)
25141 rtx base, index, disp;
25143 base = parts->base;
25144 index = parts->index;
25145 disp = parts->disp;
25147 if (disp && !base && !index)
25149 if (TARGET_64BIT)
25151 rtx symbol = disp;
25153 if (GET_CODE (disp) == CONST)
25154 symbol = XEXP (disp, 0);
25155 if (GET_CODE (symbol) == PLUS
25156 && CONST_INT_P (XEXP (symbol, 1)))
25157 symbol = XEXP (symbol, 0);
25159 if (GET_CODE (symbol) == LABEL_REF
25160 || (GET_CODE (symbol) == SYMBOL_REF
25161 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25162 || (GET_CODE (symbol) == UNSPEC
25163 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25164 || XINT (symbol, 1) == UNSPEC_PCREL
25165 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25166 return true;
25169 return false;
25172 /* Calculate the length of the memory address in the instruction encoding.
25173 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25174 or other prefixes. We never generate addr32 prefix for LEA insn. */
25177 memory_address_length (rtx addr, bool lea)
25179 struct ix86_address parts;
25180 rtx base, index, disp;
25181 int len;
25182 int ok;
25184 if (GET_CODE (addr) == PRE_DEC
25185 || GET_CODE (addr) == POST_INC
25186 || GET_CODE (addr) == PRE_MODIFY
25187 || GET_CODE (addr) == POST_MODIFY)
25188 return 0;
25190 ok = ix86_decompose_address (addr, &parts);
25191 gcc_assert (ok);
25193 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25195 /* If this is not LEA instruction, add the length of addr32 prefix. */
25196 if (TARGET_64BIT && !lea
25197 && (SImode_address_operand (addr, VOIDmode)
25198 || (parts.base && GET_MODE (parts.base) == SImode)
25199 || (parts.index && GET_MODE (parts.index) == SImode)))
25200 len++;
25202 base = parts.base;
25203 index = parts.index;
25204 disp = parts.disp;
25206 if (base && GET_CODE (base) == SUBREG)
25207 base = SUBREG_REG (base);
25208 if (index && GET_CODE (index) == SUBREG)
25209 index = SUBREG_REG (index);
25211 gcc_assert (base == NULL_RTX || REG_P (base));
25212 gcc_assert (index == NULL_RTX || REG_P (index));
25214 /* Rule of thumb:
25215 - esp as the base always wants an index,
25216 - ebp as the base always wants a displacement,
25217 - r12 as the base always wants an index,
25218 - r13 as the base always wants a displacement. */
25220 /* Register Indirect. */
25221 if (base && !index && !disp)
25223 /* esp (for its index) and ebp (for its displacement) need
25224 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25225 code. */
25226 if (base == arg_pointer_rtx
25227 || base == frame_pointer_rtx
25228 || REGNO (base) == SP_REG
25229 || REGNO (base) == BP_REG
25230 || REGNO (base) == R12_REG
25231 || REGNO (base) == R13_REG)
25232 len++;
25235 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25236 is not disp32, but disp32(%rip), so for disp32
25237 SIB byte is needed, unless print_operand_address
25238 optimizes it into disp32(%rip) or (%rip) is implied
25239 by UNSPEC. */
25240 else if (disp && !base && !index)
25242 len += 4;
25243 if (rip_relative_addr_p (&parts))
25244 len++;
25246 else
25248 /* Find the length of the displacement constant. */
25249 if (disp)
25251 if (base && satisfies_constraint_K (disp))
25252 len += 1;
25253 else
25254 len += 4;
25256 /* ebp always wants a displacement. Similarly r13. */
25257 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25258 len++;
25260 /* An index requires the two-byte modrm form.... */
25261 if (index
25262 /* ...like esp (or r12), which always wants an index. */
25263 || base == arg_pointer_rtx
25264 || base == frame_pointer_rtx
25265 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25266 len++;
25269 return len;
25272 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25273 is set, expect that insn have 8bit immediate alternative. */
25275 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25277 int len = 0;
25278 int i;
25279 extract_insn_cached (insn);
25280 for (i = recog_data.n_operands - 1; i >= 0; --i)
25281 if (CONSTANT_P (recog_data.operand[i]))
25283 enum attr_mode mode = get_attr_mode (insn);
25285 gcc_assert (!len);
25286 if (shortform && CONST_INT_P (recog_data.operand[i]))
25288 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25289 switch (mode)
25291 case MODE_QI:
25292 len = 1;
25293 continue;
25294 case MODE_HI:
25295 ival = trunc_int_for_mode (ival, HImode);
25296 break;
25297 case MODE_SI:
25298 ival = trunc_int_for_mode (ival, SImode);
25299 break;
25300 default:
25301 break;
25303 if (IN_RANGE (ival, -128, 127))
25305 len = 1;
25306 continue;
25309 switch (mode)
25311 case MODE_QI:
25312 len = 1;
25313 break;
25314 case MODE_HI:
25315 len = 2;
25316 break;
25317 case MODE_SI:
25318 len = 4;
25319 break;
25320 /* Immediates for DImode instructions are encoded
25321 as 32bit sign extended values. */
25322 case MODE_DI:
25323 len = 4;
25324 break;
25325 default:
25326 fatal_insn ("unknown insn mode", insn);
25329 return len;
25332 /* Compute default value for "length_address" attribute. */
25334 ix86_attr_length_address_default (rtx insn)
25336 int i;
25338 if (get_attr_type (insn) == TYPE_LEA)
25340 rtx set = PATTERN (insn), addr;
25342 if (GET_CODE (set) == PARALLEL)
25343 set = XVECEXP (set, 0, 0);
25345 gcc_assert (GET_CODE (set) == SET);
25347 addr = SET_SRC (set);
25349 return memory_address_length (addr, true);
25352 extract_insn_cached (insn);
25353 for (i = recog_data.n_operands - 1; i >= 0; --i)
25354 if (MEM_P (recog_data.operand[i]))
25356 constrain_operands_cached (reload_completed);
25357 if (which_alternative != -1)
25359 const char *constraints = recog_data.constraints[i];
25360 int alt = which_alternative;
25362 while (*constraints == '=' || *constraints == '+')
25363 constraints++;
25364 while (alt-- > 0)
25365 while (*constraints++ != ',')
25367 /* Skip ignored operands. */
25368 if (*constraints == 'X')
25369 continue;
25371 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25373 return 0;
25376 /* Compute default value for "length_vex" attribute. It includes
25377 2 or 3 byte VEX prefix and 1 opcode byte. */
25380 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25382 int i;
25384 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25385 byte VEX prefix. */
25386 if (!has_0f_opcode || has_vex_w)
25387 return 3 + 1;
25389 /* We can always use 2 byte VEX prefix in 32bit. */
25390 if (!TARGET_64BIT)
25391 return 2 + 1;
25393 extract_insn_cached (insn);
25395 for (i = recog_data.n_operands - 1; i >= 0; --i)
25396 if (REG_P (recog_data.operand[i]))
25398 /* REX.W bit uses 3 byte VEX prefix. */
25399 if (GET_MODE (recog_data.operand[i]) == DImode
25400 && GENERAL_REG_P (recog_data.operand[i]))
25401 return 3 + 1;
25403 else
25405 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25406 if (MEM_P (recog_data.operand[i])
25407 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25408 return 3 + 1;
25411 return 2 + 1;
25414 /* Return the maximum number of instructions a cpu can issue. */
25416 static int
25417 ix86_issue_rate (void)
25419 switch (ix86_tune)
25421 case PROCESSOR_PENTIUM:
25422 case PROCESSOR_BONNELL:
25423 case PROCESSOR_SILVERMONT:
25424 case PROCESSOR_INTEL:
25425 case PROCESSOR_K6:
25426 case PROCESSOR_BTVER2:
25427 case PROCESSOR_PENTIUM4:
25428 case PROCESSOR_NOCONA:
25429 return 2;
25431 case PROCESSOR_PENTIUMPRO:
25432 case PROCESSOR_ATHLON:
25433 case PROCESSOR_K8:
25434 case PROCESSOR_AMDFAM10:
25435 case PROCESSOR_GENERIC:
25436 case PROCESSOR_BTVER1:
25437 return 3;
25439 case PROCESSOR_BDVER1:
25440 case PROCESSOR_BDVER2:
25441 case PROCESSOR_BDVER3:
25442 case PROCESSOR_BDVER4:
25443 case PROCESSOR_CORE2:
25444 case PROCESSOR_NEHALEM:
25445 case PROCESSOR_SANDYBRIDGE:
25446 case PROCESSOR_HASWELL:
25447 return 4;
25449 default:
25450 return 1;
25454 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25455 by DEP_INSN and nothing set by DEP_INSN. */
25457 static bool
25458 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25460 rtx set, set2;
25462 /* Simplify the test for uninteresting insns. */
25463 if (insn_type != TYPE_SETCC
25464 && insn_type != TYPE_ICMOV
25465 && insn_type != TYPE_FCMOV
25466 && insn_type != TYPE_IBR)
25467 return false;
25469 if ((set = single_set (dep_insn)) != 0)
25471 set = SET_DEST (set);
25472 set2 = NULL_RTX;
25474 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25475 && XVECLEN (PATTERN (dep_insn), 0) == 2
25476 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25477 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25479 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25480 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25482 else
25483 return false;
25485 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25486 return false;
25488 /* This test is true if the dependent insn reads the flags but
25489 not any other potentially set register. */
25490 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25491 return false;
25493 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25494 return false;
25496 return true;
25499 /* Return true iff USE_INSN has a memory address with operands set by
25500 SET_INSN. */
25502 bool
25503 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25505 int i;
25506 extract_insn_cached (use_insn);
25507 for (i = recog_data.n_operands - 1; i >= 0; --i)
25508 if (MEM_P (recog_data.operand[i]))
25510 rtx addr = XEXP (recog_data.operand[i], 0);
25511 return modified_in_p (addr, set_insn) != 0;
25513 return false;
25516 /* Helper function for exact_store_load_dependency.
25517 Return true if addr is found in insn. */
25518 static bool
25519 exact_dependency_1 (rtx addr, rtx insn)
25521 enum rtx_code code;
25522 const char *format_ptr;
25523 int i, j;
25525 code = GET_CODE (insn);
25526 switch (code)
25528 case MEM:
25529 if (rtx_equal_p (addr, insn))
25530 return true;
25531 break;
25532 case REG:
25533 CASE_CONST_ANY:
25534 case SYMBOL_REF:
25535 case CODE_LABEL:
25536 case PC:
25537 case CC0:
25538 case EXPR_LIST:
25539 return false;
25540 default:
25541 break;
25544 format_ptr = GET_RTX_FORMAT (code);
25545 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25547 switch (*format_ptr++)
25549 case 'e':
25550 if (exact_dependency_1 (addr, XEXP (insn, i)))
25551 return true;
25552 break;
25553 case 'E':
25554 for (j = 0; j < XVECLEN (insn, i); j++)
25555 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25556 return true;
25557 break;
25560 return false;
25563 /* Return true if there exists exact dependency for store & load, i.e.
25564 the same memory address is used in them. */
25565 static bool
25566 exact_store_load_dependency (rtx store, rtx load)
25568 rtx set1, set2;
25570 set1 = single_set (store);
25571 if (!set1)
25572 return false;
25573 if (!MEM_P (SET_DEST (set1)))
25574 return false;
25575 set2 = single_set (load);
25576 if (!set2)
25577 return false;
25578 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25579 return true;
25580 return false;
25583 static int
25584 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25586 enum attr_type insn_type, dep_insn_type;
25587 enum attr_memory memory;
25588 rtx set, set2;
25589 int dep_insn_code_number;
25591 /* Anti and output dependencies have zero cost on all CPUs. */
25592 if (REG_NOTE_KIND (link) != 0)
25593 return 0;
25595 dep_insn_code_number = recog_memoized (dep_insn);
25597 /* If we can't recognize the insns, we can't really do anything. */
25598 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25599 return cost;
25601 insn_type = get_attr_type (insn);
25602 dep_insn_type = get_attr_type (dep_insn);
25604 switch (ix86_tune)
25606 case PROCESSOR_PENTIUM:
25607 /* Address Generation Interlock adds a cycle of latency. */
25608 if (insn_type == TYPE_LEA)
25610 rtx addr = PATTERN (insn);
25612 if (GET_CODE (addr) == PARALLEL)
25613 addr = XVECEXP (addr, 0, 0);
25615 gcc_assert (GET_CODE (addr) == SET);
25617 addr = SET_SRC (addr);
25618 if (modified_in_p (addr, dep_insn))
25619 cost += 1;
25621 else if (ix86_agi_dependent (dep_insn, insn))
25622 cost += 1;
25624 /* ??? Compares pair with jump/setcc. */
25625 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25626 cost = 0;
25628 /* Floating point stores require value to be ready one cycle earlier. */
25629 if (insn_type == TYPE_FMOV
25630 && get_attr_memory (insn) == MEMORY_STORE
25631 && !ix86_agi_dependent (dep_insn, insn))
25632 cost += 1;
25633 break;
25635 case PROCESSOR_PENTIUMPRO:
25636 /* INT->FP conversion is expensive. */
25637 if (get_attr_fp_int_src (dep_insn))
25638 cost += 5;
25640 /* There is one cycle extra latency between an FP op and a store. */
25641 if (insn_type == TYPE_FMOV
25642 && (set = single_set (dep_insn)) != NULL_RTX
25643 && (set2 = single_set (insn)) != NULL_RTX
25644 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25645 && MEM_P (SET_DEST (set2)))
25646 cost += 1;
25648 memory = get_attr_memory (insn);
25650 /* Show ability of reorder buffer to hide latency of load by executing
25651 in parallel with previous instruction in case
25652 previous instruction is not needed to compute the address. */
25653 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25654 && !ix86_agi_dependent (dep_insn, insn))
25656 /* Claim moves to take one cycle, as core can issue one load
25657 at time and the next load can start cycle later. */
25658 if (dep_insn_type == TYPE_IMOV
25659 || dep_insn_type == TYPE_FMOV)
25660 cost = 1;
25661 else if (cost > 1)
25662 cost--;
25664 break;
25666 case PROCESSOR_K6:
25667 /* The esp dependency is resolved before
25668 the instruction is really finished. */
25669 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25670 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25671 return 1;
25673 /* INT->FP conversion is expensive. */
25674 if (get_attr_fp_int_src (dep_insn))
25675 cost += 5;
25677 memory = get_attr_memory (insn);
25679 /* Show ability of reorder buffer to hide latency of load by executing
25680 in parallel with previous instruction in case
25681 previous instruction is not needed to compute the address. */
25682 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25683 && !ix86_agi_dependent (dep_insn, insn))
25685 /* Claim moves to take one cycle, as core can issue one load
25686 at time and the next load can start cycle later. */
25687 if (dep_insn_type == TYPE_IMOV
25688 || dep_insn_type == TYPE_FMOV)
25689 cost = 1;
25690 else if (cost > 2)
25691 cost -= 2;
25692 else
25693 cost = 1;
25695 break;
25697 case PROCESSOR_AMDFAM10:
25698 case PROCESSOR_BDVER1:
25699 case PROCESSOR_BDVER2:
25700 case PROCESSOR_BDVER3:
25701 case PROCESSOR_BDVER4:
25702 case PROCESSOR_BTVER1:
25703 case PROCESSOR_BTVER2:
25704 case PROCESSOR_GENERIC:
25705 /* Stack engine allows to execute push&pop instructions in parall. */
25706 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25707 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25708 return 0;
25709 /* FALLTHRU */
25711 case PROCESSOR_ATHLON:
25712 case PROCESSOR_K8:
25713 memory = get_attr_memory (insn);
25715 /* Show ability of reorder buffer to hide latency of load by executing
25716 in parallel with previous instruction in case
25717 previous instruction is not needed to compute the address. */
25718 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25719 && !ix86_agi_dependent (dep_insn, insn))
25721 enum attr_unit unit = get_attr_unit (insn);
25722 int loadcost = 3;
25724 /* Because of the difference between the length of integer and
25725 floating unit pipeline preparation stages, the memory operands
25726 for floating point are cheaper.
25728 ??? For Athlon it the difference is most probably 2. */
25729 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25730 loadcost = 3;
25731 else
25732 loadcost = TARGET_ATHLON ? 2 : 0;
25734 if (cost >= loadcost)
25735 cost -= loadcost;
25736 else
25737 cost = 0;
25739 break;
25741 case PROCESSOR_CORE2:
25742 case PROCESSOR_NEHALEM:
25743 case PROCESSOR_SANDYBRIDGE:
25744 case PROCESSOR_HASWELL:
25745 /* Stack engine allows to execute push&pop instructions in parall. */
25746 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25747 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25748 return 0;
25750 memory = get_attr_memory (insn);
25752 /* Show ability of reorder buffer to hide latency of load by executing
25753 in parallel with previous instruction in case
25754 previous instruction is not needed to compute the address. */
25755 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25756 && !ix86_agi_dependent (dep_insn, insn))
25758 if (cost >= 4)
25759 cost -= 4;
25760 else
25761 cost = 0;
25763 break;
25765 case PROCESSOR_SILVERMONT:
25766 case PROCESSOR_INTEL:
25767 if (!reload_completed)
25768 return cost;
25770 /* Increase cost of integer loads. */
25771 memory = get_attr_memory (dep_insn);
25772 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25774 enum attr_unit unit = get_attr_unit (dep_insn);
25775 if (unit == UNIT_INTEGER && cost == 1)
25777 if (memory == MEMORY_LOAD)
25778 cost = 3;
25779 else
25781 /* Increase cost of ld/st for short int types only
25782 because of store forwarding issue. */
25783 rtx set = single_set (dep_insn);
25784 if (set && (GET_MODE (SET_DEST (set)) == QImode
25785 || GET_MODE (SET_DEST (set)) == HImode))
25787 /* Increase cost of store/load insn if exact
25788 dependence exists and it is load insn. */
25789 enum attr_memory insn_memory = get_attr_memory (insn);
25790 if (insn_memory == MEMORY_LOAD
25791 && exact_store_load_dependency (dep_insn, insn))
25792 cost = 3;
25798 default:
25799 break;
25802 return cost;
25805 /* How many alternative schedules to try. This should be as wide as the
25806 scheduling freedom in the DFA, but no wider. Making this value too
25807 large results extra work for the scheduler. */
25809 static int
25810 ia32_multipass_dfa_lookahead (void)
25812 switch (ix86_tune)
25814 case PROCESSOR_PENTIUM:
25815 return 2;
25817 case PROCESSOR_PENTIUMPRO:
25818 case PROCESSOR_K6:
25819 return 1;
25821 case PROCESSOR_BDVER1:
25822 case PROCESSOR_BDVER2:
25823 case PROCESSOR_BDVER3:
25824 case PROCESSOR_BDVER4:
25825 /* We use lookahead value 4 for BD both before and after reload
25826 schedules. Plan is to have value 8 included for O3. */
25827 return 4;
25829 case PROCESSOR_CORE2:
25830 case PROCESSOR_NEHALEM:
25831 case PROCESSOR_SANDYBRIDGE:
25832 case PROCESSOR_HASWELL:
25833 case PROCESSOR_BONNELL:
25834 case PROCESSOR_SILVERMONT:
25835 case PROCESSOR_INTEL:
25836 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25837 as many instructions can be executed on a cycle, i.e.,
25838 issue_rate. I wonder why tuning for many CPUs does not do this. */
25839 if (reload_completed)
25840 return ix86_issue_rate ();
25841 /* Don't use lookahead for pre-reload schedule to save compile time. */
25842 return 0;
25844 default:
25845 return 0;
25849 /* Return true if target platform supports macro-fusion. */
25851 static bool
25852 ix86_macro_fusion_p ()
25854 return TARGET_FUSE_CMP_AND_BRANCH;
25857 /* Check whether current microarchitecture support macro fusion
25858 for insn pair "CONDGEN + CONDJMP". Refer to
25859 "Intel Architectures Optimization Reference Manual". */
25861 static bool
25862 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25864 rtx src, dest;
25865 rtx single_set = single_set (condgen);
25866 enum rtx_code ccode;
25867 rtx compare_set = NULL_RTX, test_if, cond;
25868 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25870 if (!any_condjump_p (condjmp))
25871 return false;
25873 if (get_attr_type (condgen) != TYPE_TEST
25874 && get_attr_type (condgen) != TYPE_ICMP
25875 && get_attr_type (condgen) != TYPE_INCDEC
25876 && get_attr_type (condgen) != TYPE_ALU)
25877 return false;
25879 if (single_set == NULL_RTX
25880 && !TARGET_FUSE_ALU_AND_BRANCH)
25881 return false;
25883 if (single_set != NULL_RTX)
25884 compare_set = single_set;
25885 else
25887 int i;
25888 rtx pat = PATTERN (condgen);
25889 for (i = 0; i < XVECLEN (pat, 0); i++)
25890 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25892 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25893 if (GET_CODE (set_src) == COMPARE)
25894 compare_set = XVECEXP (pat, 0, i);
25895 else
25896 alu_set = XVECEXP (pat, 0, i);
25899 if (compare_set == NULL_RTX)
25900 return false;
25901 src = SET_SRC (compare_set);
25902 if (GET_CODE (src) != COMPARE)
25903 return false;
25905 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25906 supported. */
25907 if ((MEM_P (XEXP (src, 0))
25908 && CONST_INT_P (XEXP (src, 1)))
25909 || (MEM_P (XEXP (src, 1))
25910 && CONST_INT_P (XEXP (src, 0))))
25911 return false;
25913 /* No fusion for RIP-relative address. */
25914 if (MEM_P (XEXP (src, 0)))
25915 addr = XEXP (XEXP (src, 0), 0);
25916 else if (MEM_P (XEXP (src, 1)))
25917 addr = XEXP (XEXP (src, 1), 0);
25919 if (addr) {
25920 ix86_address parts;
25921 int ok = ix86_decompose_address (addr, &parts);
25922 gcc_assert (ok);
25924 if (rip_relative_addr_p (&parts))
25925 return false;
25928 test_if = SET_SRC (pc_set (condjmp));
25929 cond = XEXP (test_if, 0);
25930 ccode = GET_CODE (cond);
25931 /* Check whether conditional jump use Sign or Overflow Flags. */
25932 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25933 && (ccode == GE
25934 || ccode == GT
25935 || ccode == LE
25936 || ccode == LT))
25937 return false;
25939 /* Return true for TYPE_TEST and TYPE_ICMP. */
25940 if (get_attr_type (condgen) == TYPE_TEST
25941 || get_attr_type (condgen) == TYPE_ICMP)
25942 return true;
25944 /* The following is the case that macro-fusion for alu + jmp. */
25945 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25946 return false;
25948 /* No fusion for alu op with memory destination operand. */
25949 dest = SET_DEST (alu_set);
25950 if (MEM_P (dest))
25951 return false;
25953 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25954 supported. */
25955 if (get_attr_type (condgen) == TYPE_INCDEC
25956 && (ccode == GEU
25957 || ccode == GTU
25958 || ccode == LEU
25959 || ccode == LTU))
25960 return false;
25962 return true;
25965 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25966 execution. It is applied if
25967 (1) IMUL instruction is on the top of list;
25968 (2) There exists the only producer of independent IMUL instruction in
25969 ready list.
25970 Return index of IMUL producer if it was found and -1 otherwise. */
25971 static int
25972 do_reorder_for_imul (rtx *ready, int n_ready)
25974 rtx insn, set, insn1, insn2;
25975 sd_iterator_def sd_it;
25976 dep_t dep;
25977 int index = -1;
25978 int i;
25980 if (!TARGET_BONNELL)
25981 return index;
25983 /* Check that IMUL instruction is on the top of ready list. */
25984 insn = ready[n_ready - 1];
25985 set = single_set (insn);
25986 if (!set)
25987 return index;
25988 if (!(GET_CODE (SET_SRC (set)) == MULT
25989 && GET_MODE (SET_SRC (set)) == SImode))
25990 return index;
25992 /* Search for producer of independent IMUL instruction. */
25993 for (i = n_ready - 2; i >= 0; i--)
25995 insn = ready[i];
25996 if (!NONDEBUG_INSN_P (insn))
25997 continue;
25998 /* Skip IMUL instruction. */
25999 insn2 = PATTERN (insn);
26000 if (GET_CODE (insn2) == PARALLEL)
26001 insn2 = XVECEXP (insn2, 0, 0);
26002 if (GET_CODE (insn2) == SET
26003 && GET_CODE (SET_SRC (insn2)) == MULT
26004 && GET_MODE (SET_SRC (insn2)) == SImode)
26005 continue;
26007 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26009 rtx con;
26010 con = DEP_CON (dep);
26011 if (!NONDEBUG_INSN_P (con))
26012 continue;
26013 insn1 = PATTERN (con);
26014 if (GET_CODE (insn1) == PARALLEL)
26015 insn1 = XVECEXP (insn1, 0, 0);
26017 if (GET_CODE (insn1) == SET
26018 && GET_CODE (SET_SRC (insn1)) == MULT
26019 && GET_MODE (SET_SRC (insn1)) == SImode)
26021 sd_iterator_def sd_it1;
26022 dep_t dep1;
26023 /* Check if there is no other dependee for IMUL. */
26024 index = i;
26025 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26027 rtx pro;
26028 pro = DEP_PRO (dep1);
26029 if (!NONDEBUG_INSN_P (pro))
26030 continue;
26031 if (pro != insn)
26032 index = -1;
26034 if (index >= 0)
26035 break;
26038 if (index >= 0)
26039 break;
26041 return index;
26044 /* Try to find the best candidate on the top of ready list if two insns
26045 have the same priority - candidate is best if its dependees were
26046 scheduled earlier. Applied for Silvermont only.
26047 Return true if top 2 insns must be interchanged. */
26048 static bool
26049 swap_top_of_ready_list (rtx *ready, int n_ready)
26051 rtx top = ready[n_ready - 1];
26052 rtx next = ready[n_ready - 2];
26053 rtx set;
26054 sd_iterator_def sd_it;
26055 dep_t dep;
26056 int clock1 = -1;
26057 int clock2 = -1;
26058 #define INSN_TICK(INSN) (HID (INSN)->tick)
26060 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26061 return false;
26063 if (!NONDEBUG_INSN_P (top))
26064 return false;
26065 if (!NONJUMP_INSN_P (top))
26066 return false;
26067 if (!NONDEBUG_INSN_P (next))
26068 return false;
26069 if (!NONJUMP_INSN_P (next))
26070 return false;
26071 set = single_set (top);
26072 if (!set)
26073 return false;
26074 set = single_set (next);
26075 if (!set)
26076 return false;
26078 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26080 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26081 return false;
26082 /* Determine winner more precise. */
26083 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26085 rtx pro;
26086 pro = DEP_PRO (dep);
26087 if (!NONDEBUG_INSN_P (pro))
26088 continue;
26089 if (INSN_TICK (pro) > clock1)
26090 clock1 = INSN_TICK (pro);
26092 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26094 rtx pro;
26095 pro = DEP_PRO (dep);
26096 if (!NONDEBUG_INSN_P (pro))
26097 continue;
26098 if (INSN_TICK (pro) > clock2)
26099 clock2 = INSN_TICK (pro);
26102 if (clock1 == clock2)
26104 /* Determine winner - load must win. */
26105 enum attr_memory memory1, memory2;
26106 memory1 = get_attr_memory (top);
26107 memory2 = get_attr_memory (next);
26108 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26109 return true;
26111 return (bool) (clock2 < clock1);
26113 return false;
26114 #undef INSN_TICK
26117 /* Perform possible reodering of ready list for Atom/Silvermont only.
26118 Return issue rate. */
26119 static int
26120 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26121 int clock_var)
26123 int issue_rate = -1;
26124 int n_ready = *pn_ready;
26125 int i;
26126 rtx insn;
26127 int index = -1;
26129 /* Set up issue rate. */
26130 issue_rate = ix86_issue_rate ();
26132 /* Do reodering for BONNELL/SILVERMONT only. */
26133 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26134 return issue_rate;
26136 /* Nothing to do if ready list contains only 1 instruction. */
26137 if (n_ready <= 1)
26138 return issue_rate;
26140 /* Do reodering for post-reload scheduler only. */
26141 if (!reload_completed)
26142 return issue_rate;
26144 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26146 if (sched_verbose > 1)
26147 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26148 INSN_UID (ready[index]));
26150 /* Put IMUL producer (ready[index]) at the top of ready list. */
26151 insn = ready[index];
26152 for (i = index; i < n_ready - 1; i++)
26153 ready[i] = ready[i + 1];
26154 ready[n_ready - 1] = insn;
26155 return issue_rate;
26157 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26159 if (sched_verbose > 1)
26160 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26161 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26162 /* Swap 2 top elements of ready list. */
26163 insn = ready[n_ready - 1];
26164 ready[n_ready - 1] = ready[n_ready - 2];
26165 ready[n_ready - 2] = insn;
26167 return issue_rate;
26170 static bool
26171 ix86_class_likely_spilled_p (reg_class_t);
26173 /* Returns true if lhs of insn is HW function argument register and set up
26174 is_spilled to true if it is likely spilled HW register. */
26175 static bool
26176 insn_is_function_arg (rtx insn, bool* is_spilled)
26178 rtx dst;
26180 if (!NONDEBUG_INSN_P (insn))
26181 return false;
26182 /* Call instructions are not movable, ignore it. */
26183 if (CALL_P (insn))
26184 return false;
26185 insn = PATTERN (insn);
26186 if (GET_CODE (insn) == PARALLEL)
26187 insn = XVECEXP (insn, 0, 0);
26188 if (GET_CODE (insn) != SET)
26189 return false;
26190 dst = SET_DEST (insn);
26191 if (REG_P (dst) && HARD_REGISTER_P (dst)
26192 && ix86_function_arg_regno_p (REGNO (dst)))
26194 /* Is it likely spilled HW register? */
26195 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26196 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26197 *is_spilled = true;
26198 return true;
26200 return false;
26203 /* Add output dependencies for chain of function adjacent arguments if only
26204 there is a move to likely spilled HW register. Return first argument
26205 if at least one dependence was added or NULL otherwise. */
26206 static rtx
26207 add_parameter_dependencies (rtx call, rtx head)
26209 rtx insn;
26210 rtx last = call;
26211 rtx first_arg = NULL;
26212 bool is_spilled = false;
26214 head = PREV_INSN (head);
26216 /* Find nearest to call argument passing instruction. */
26217 while (true)
26219 last = PREV_INSN (last);
26220 if (last == head)
26221 return NULL;
26222 if (!NONDEBUG_INSN_P (last))
26223 continue;
26224 if (insn_is_function_arg (last, &is_spilled))
26225 break;
26226 return NULL;
26229 first_arg = last;
26230 while (true)
26232 insn = PREV_INSN (last);
26233 if (!INSN_P (insn))
26234 break;
26235 if (insn == head)
26236 break;
26237 if (!NONDEBUG_INSN_P (insn))
26239 last = insn;
26240 continue;
26242 if (insn_is_function_arg (insn, &is_spilled))
26244 /* Add output depdendence between two function arguments if chain
26245 of output arguments contains likely spilled HW registers. */
26246 if (is_spilled)
26247 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26248 first_arg = last = insn;
26250 else
26251 break;
26253 if (!is_spilled)
26254 return NULL;
26255 return first_arg;
26258 /* Add output or anti dependency from insn to first_arg to restrict its code
26259 motion. */
26260 static void
26261 avoid_func_arg_motion (rtx first_arg, rtx insn)
26263 rtx set;
26264 rtx tmp;
26266 set = single_set (insn);
26267 if (!set)
26268 return;
26269 tmp = SET_DEST (set);
26270 if (REG_P (tmp))
26272 /* Add output dependency to the first function argument. */
26273 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26274 return;
26276 /* Add anti dependency. */
26277 add_dependence (first_arg, insn, REG_DEP_ANTI);
26280 /* Avoid cross block motion of function argument through adding dependency
26281 from the first non-jump instruction in bb. */
26282 static void
26283 add_dependee_for_func_arg (rtx arg, basic_block bb)
26285 rtx insn = BB_END (bb);
26287 while (insn)
26289 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26291 rtx set = single_set (insn);
26292 if (set)
26294 avoid_func_arg_motion (arg, insn);
26295 return;
26298 if (insn == BB_HEAD (bb))
26299 return;
26300 insn = PREV_INSN (insn);
26304 /* Hook for pre-reload schedule - avoid motion of function arguments
26305 passed in likely spilled HW registers. */
26306 static void
26307 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26309 rtx insn;
26310 rtx first_arg = NULL;
26311 if (reload_completed)
26312 return;
26313 while (head != tail && DEBUG_INSN_P (head))
26314 head = NEXT_INSN (head);
26315 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26316 if (INSN_P (insn) && CALL_P (insn))
26318 first_arg = add_parameter_dependencies (insn, head);
26319 if (first_arg)
26321 /* Add dependee for first argument to predecessors if only
26322 region contains more than one block. */
26323 basic_block bb = BLOCK_FOR_INSN (insn);
26324 int rgn = CONTAINING_RGN (bb->index);
26325 int nr_blks = RGN_NR_BLOCKS (rgn);
26326 /* Skip trivial regions and region head blocks that can have
26327 predecessors outside of region. */
26328 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26330 edge e;
26331 edge_iterator ei;
26333 /* Regions are SCCs with the exception of selective
26334 scheduling with pipelining of outer blocks enabled.
26335 So also check that immediate predecessors of a non-head
26336 block are in the same region. */
26337 FOR_EACH_EDGE (e, ei, bb->preds)
26339 /* Avoid creating of loop-carried dependencies through
26340 using topological ordering in the region. */
26341 if (rgn == CONTAINING_RGN (e->src->index)
26342 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26343 add_dependee_for_func_arg (first_arg, e->src);
26346 insn = first_arg;
26347 if (insn == head)
26348 break;
26351 else if (first_arg)
26352 avoid_func_arg_motion (first_arg, insn);
26355 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26356 HW registers to maximum, to schedule them at soon as possible. These are
26357 moves from function argument registers at the top of the function entry
26358 and moves from function return value registers after call. */
26359 static int
26360 ix86_adjust_priority (rtx insn, int priority)
26362 rtx set;
26364 if (reload_completed)
26365 return priority;
26367 if (!NONDEBUG_INSN_P (insn))
26368 return priority;
26370 set = single_set (insn);
26371 if (set)
26373 rtx tmp = SET_SRC (set);
26374 if (REG_P (tmp)
26375 && HARD_REGISTER_P (tmp)
26376 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26377 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26378 return current_sched_info->sched_max_insns_priority;
26381 return priority;
26384 /* Model decoder of Core 2/i7.
26385 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26386 track the instruction fetch block boundaries and make sure that long
26387 (9+ bytes) instructions are assigned to D0. */
26389 /* Maximum length of an insn that can be handled by
26390 a secondary decoder unit. '8' for Core 2/i7. */
26391 static int core2i7_secondary_decoder_max_insn_size;
26393 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26394 '16' for Core 2/i7. */
26395 static int core2i7_ifetch_block_size;
26397 /* Maximum number of instructions decoder can handle per cycle.
26398 '6' for Core 2/i7. */
26399 static int core2i7_ifetch_block_max_insns;
26401 typedef struct ix86_first_cycle_multipass_data_ *
26402 ix86_first_cycle_multipass_data_t;
26403 typedef const struct ix86_first_cycle_multipass_data_ *
26404 const_ix86_first_cycle_multipass_data_t;
26406 /* A variable to store target state across calls to max_issue within
26407 one cycle. */
26408 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26409 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26411 /* Initialize DATA. */
26412 static void
26413 core2i7_first_cycle_multipass_init (void *_data)
26415 ix86_first_cycle_multipass_data_t data
26416 = (ix86_first_cycle_multipass_data_t) _data;
26418 data->ifetch_block_len = 0;
26419 data->ifetch_block_n_insns = 0;
26420 data->ready_try_change = NULL;
26421 data->ready_try_change_size = 0;
26424 /* Advancing the cycle; reset ifetch block counts. */
26425 static void
26426 core2i7_dfa_post_advance_cycle (void)
26428 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26430 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26432 data->ifetch_block_len = 0;
26433 data->ifetch_block_n_insns = 0;
26436 static int min_insn_size (rtx);
26438 /* Filter out insns from ready_try that the core will not be able to issue
26439 on current cycle due to decoder. */
26440 static void
26441 core2i7_first_cycle_multipass_filter_ready_try
26442 (const_ix86_first_cycle_multipass_data_t data,
26443 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26445 while (n_ready--)
26447 rtx insn;
26448 int insn_size;
26450 if (ready_try[n_ready])
26451 continue;
26453 insn = get_ready_element (n_ready);
26454 insn_size = min_insn_size (insn);
26456 if (/* If this is a too long an insn for a secondary decoder ... */
26457 (!first_cycle_insn_p
26458 && insn_size > core2i7_secondary_decoder_max_insn_size)
26459 /* ... or it would not fit into the ifetch block ... */
26460 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26461 /* ... or the decoder is full already ... */
26462 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26463 /* ... mask the insn out. */
26465 ready_try[n_ready] = 1;
26467 if (data->ready_try_change)
26468 bitmap_set_bit (data->ready_try_change, n_ready);
26473 /* Prepare for a new round of multipass lookahead scheduling. */
26474 static void
26475 core2i7_first_cycle_multipass_begin (void *_data,
26476 signed char *ready_try, int n_ready,
26477 bool first_cycle_insn_p)
26479 ix86_first_cycle_multipass_data_t data
26480 = (ix86_first_cycle_multipass_data_t) _data;
26481 const_ix86_first_cycle_multipass_data_t prev_data
26482 = ix86_first_cycle_multipass_data;
26484 /* Restore the state from the end of the previous round. */
26485 data->ifetch_block_len = prev_data->ifetch_block_len;
26486 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26488 /* Filter instructions that cannot be issued on current cycle due to
26489 decoder restrictions. */
26490 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26491 first_cycle_insn_p);
26494 /* INSN is being issued in current solution. Account for its impact on
26495 the decoder model. */
26496 static void
26497 core2i7_first_cycle_multipass_issue (void *_data,
26498 signed char *ready_try, int n_ready,
26499 rtx insn, const void *_prev_data)
26501 ix86_first_cycle_multipass_data_t data
26502 = (ix86_first_cycle_multipass_data_t) _data;
26503 const_ix86_first_cycle_multipass_data_t prev_data
26504 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26506 int insn_size = min_insn_size (insn);
26508 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26509 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26510 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26511 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26513 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26514 if (!data->ready_try_change)
26516 data->ready_try_change = sbitmap_alloc (n_ready);
26517 data->ready_try_change_size = n_ready;
26519 else if (data->ready_try_change_size < n_ready)
26521 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26522 n_ready, 0);
26523 data->ready_try_change_size = n_ready;
26525 bitmap_clear (data->ready_try_change);
26527 /* Filter out insns from ready_try that the core will not be able to issue
26528 on current cycle due to decoder. */
26529 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26530 false);
26533 /* Revert the effect on ready_try. */
26534 static void
26535 core2i7_first_cycle_multipass_backtrack (const void *_data,
26536 signed char *ready_try,
26537 int n_ready ATTRIBUTE_UNUSED)
26539 const_ix86_first_cycle_multipass_data_t data
26540 = (const_ix86_first_cycle_multipass_data_t) _data;
26541 unsigned int i = 0;
26542 sbitmap_iterator sbi;
26544 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26545 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26547 ready_try[i] = 0;
26551 /* Save the result of multipass lookahead scheduling for the next round. */
26552 static void
26553 core2i7_first_cycle_multipass_end (const void *_data)
26555 const_ix86_first_cycle_multipass_data_t data
26556 = (const_ix86_first_cycle_multipass_data_t) _data;
26557 ix86_first_cycle_multipass_data_t next_data
26558 = ix86_first_cycle_multipass_data;
26560 if (data != NULL)
26562 next_data->ifetch_block_len = data->ifetch_block_len;
26563 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26567 /* Deallocate target data. */
26568 static void
26569 core2i7_first_cycle_multipass_fini (void *_data)
26571 ix86_first_cycle_multipass_data_t data
26572 = (ix86_first_cycle_multipass_data_t) _data;
26574 if (data->ready_try_change)
26576 sbitmap_free (data->ready_try_change);
26577 data->ready_try_change = NULL;
26578 data->ready_try_change_size = 0;
26582 /* Prepare for scheduling pass. */
26583 static void
26584 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26585 int verbose ATTRIBUTE_UNUSED,
26586 int max_uid ATTRIBUTE_UNUSED)
26588 /* Install scheduling hooks for current CPU. Some of these hooks are used
26589 in time-critical parts of the scheduler, so we only set them up when
26590 they are actually used. */
26591 switch (ix86_tune)
26593 case PROCESSOR_CORE2:
26594 case PROCESSOR_NEHALEM:
26595 case PROCESSOR_SANDYBRIDGE:
26596 case PROCESSOR_HASWELL:
26597 /* Do not perform multipass scheduling for pre-reload schedule
26598 to save compile time. */
26599 if (reload_completed)
26601 targetm.sched.dfa_post_advance_cycle
26602 = core2i7_dfa_post_advance_cycle;
26603 targetm.sched.first_cycle_multipass_init
26604 = core2i7_first_cycle_multipass_init;
26605 targetm.sched.first_cycle_multipass_begin
26606 = core2i7_first_cycle_multipass_begin;
26607 targetm.sched.first_cycle_multipass_issue
26608 = core2i7_first_cycle_multipass_issue;
26609 targetm.sched.first_cycle_multipass_backtrack
26610 = core2i7_first_cycle_multipass_backtrack;
26611 targetm.sched.first_cycle_multipass_end
26612 = core2i7_first_cycle_multipass_end;
26613 targetm.sched.first_cycle_multipass_fini
26614 = core2i7_first_cycle_multipass_fini;
26616 /* Set decoder parameters. */
26617 core2i7_secondary_decoder_max_insn_size = 8;
26618 core2i7_ifetch_block_size = 16;
26619 core2i7_ifetch_block_max_insns = 6;
26620 break;
26622 /* ... Fall through ... */
26623 default:
26624 targetm.sched.dfa_post_advance_cycle = NULL;
26625 targetm.sched.first_cycle_multipass_init = NULL;
26626 targetm.sched.first_cycle_multipass_begin = NULL;
26627 targetm.sched.first_cycle_multipass_issue = NULL;
26628 targetm.sched.first_cycle_multipass_backtrack = NULL;
26629 targetm.sched.first_cycle_multipass_end = NULL;
26630 targetm.sched.first_cycle_multipass_fini = NULL;
26631 break;
26636 /* Compute the alignment given to a constant that is being placed in memory.
26637 EXP is the constant and ALIGN is the alignment that the object would
26638 ordinarily have.
26639 The value of this function is used instead of that alignment to align
26640 the object. */
26643 ix86_constant_alignment (tree exp, int align)
26645 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26646 || TREE_CODE (exp) == INTEGER_CST)
26648 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26649 return 64;
26650 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26651 return 128;
26653 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26654 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26655 return BITS_PER_WORD;
26657 return align;
26660 /* Compute the alignment for a static variable.
26661 TYPE is the data type, and ALIGN is the alignment that
26662 the object would ordinarily have. The value of this function is used
26663 instead of that alignment to align the object. */
26666 ix86_data_alignment (tree type, int align, bool opt)
26668 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26669 for symbols from other compilation units or symbols that don't need
26670 to bind locally. In order to preserve some ABI compatibility with
26671 those compilers, ensure we don't decrease alignment from what we
26672 used to assume. */
26674 int max_align_compat
26675 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26677 /* A data structure, equal or greater than the size of a cache line
26678 (64 bytes in the Pentium 4 and other recent Intel processors, including
26679 processors based on Intel Core microarchitecture) should be aligned
26680 so that its base address is a multiple of a cache line size. */
26682 int max_align
26683 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26685 if (max_align < BITS_PER_WORD)
26686 max_align = BITS_PER_WORD;
26688 if (opt
26689 && AGGREGATE_TYPE_P (type)
26690 && TYPE_SIZE (type)
26691 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26693 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
26694 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26695 && align < max_align_compat)
26696 align = max_align_compat;
26697 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26698 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26699 && align < max_align)
26700 align = max_align;
26703 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26704 to 16byte boundary. */
26705 if (TARGET_64BIT)
26707 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26708 && TYPE_SIZE (type)
26709 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26710 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26711 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26712 return 128;
26715 if (!opt)
26716 return align;
26718 if (TREE_CODE (type) == ARRAY_TYPE)
26720 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26721 return 64;
26722 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26723 return 128;
26725 else if (TREE_CODE (type) == COMPLEX_TYPE)
26728 if (TYPE_MODE (type) == DCmode && align < 64)
26729 return 64;
26730 if ((TYPE_MODE (type) == XCmode
26731 || TYPE_MODE (type) == TCmode) && align < 128)
26732 return 128;
26734 else if ((TREE_CODE (type) == RECORD_TYPE
26735 || TREE_CODE (type) == UNION_TYPE
26736 || TREE_CODE (type) == QUAL_UNION_TYPE)
26737 && TYPE_FIELDS (type))
26739 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26740 return 64;
26741 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26742 return 128;
26744 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26745 || TREE_CODE (type) == INTEGER_TYPE)
26747 if (TYPE_MODE (type) == DFmode && align < 64)
26748 return 64;
26749 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26750 return 128;
26753 return align;
26756 /* Compute the alignment for a local variable or a stack slot. EXP is
26757 the data type or decl itself, MODE is the widest mode available and
26758 ALIGN is the alignment that the object would ordinarily have. The
26759 value of this macro is used instead of that alignment to align the
26760 object. */
26762 unsigned int
26763 ix86_local_alignment (tree exp, enum machine_mode mode,
26764 unsigned int align)
26766 tree type, decl;
26768 if (exp && DECL_P (exp))
26770 type = TREE_TYPE (exp);
26771 decl = exp;
26773 else
26775 type = exp;
26776 decl = NULL;
26779 /* Don't do dynamic stack realignment for long long objects with
26780 -mpreferred-stack-boundary=2. */
26781 if (!TARGET_64BIT
26782 && align == 64
26783 && ix86_preferred_stack_boundary < 64
26784 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26785 && (!type || !TYPE_USER_ALIGN (type))
26786 && (!decl || !DECL_USER_ALIGN (decl)))
26787 align = 32;
26789 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26790 register in MODE. We will return the largest alignment of XF
26791 and DF. */
26792 if (!type)
26794 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26795 align = GET_MODE_ALIGNMENT (DFmode);
26796 return align;
26799 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26800 to 16byte boundary. Exact wording is:
26802 An array uses the same alignment as its elements, except that a local or
26803 global array variable of length at least 16 bytes or
26804 a C99 variable-length array variable always has alignment of at least 16 bytes.
26806 This was added to allow use of aligned SSE instructions at arrays. This
26807 rule is meant for static storage (where compiler can not do the analysis
26808 by itself). We follow it for automatic variables only when convenient.
26809 We fully control everything in the function compiled and functions from
26810 other unit can not rely on the alignment.
26812 Exclude va_list type. It is the common case of local array where
26813 we can not benefit from the alignment.
26815 TODO: Probably one should optimize for size only when var is not escaping. */
26816 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26817 && TARGET_SSE)
26819 if (AGGREGATE_TYPE_P (type)
26820 && (va_list_type_node == NULL_TREE
26821 || (TYPE_MAIN_VARIANT (type)
26822 != TYPE_MAIN_VARIANT (va_list_type_node)))
26823 && TYPE_SIZE (type)
26824 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26825 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26826 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26827 return 128;
26829 if (TREE_CODE (type) == ARRAY_TYPE)
26831 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26832 return 64;
26833 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26834 return 128;
26836 else if (TREE_CODE (type) == COMPLEX_TYPE)
26838 if (TYPE_MODE (type) == DCmode && align < 64)
26839 return 64;
26840 if ((TYPE_MODE (type) == XCmode
26841 || TYPE_MODE (type) == TCmode) && align < 128)
26842 return 128;
26844 else if ((TREE_CODE (type) == RECORD_TYPE
26845 || TREE_CODE (type) == UNION_TYPE
26846 || TREE_CODE (type) == QUAL_UNION_TYPE)
26847 && TYPE_FIELDS (type))
26849 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26850 return 64;
26851 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26852 return 128;
26854 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26855 || TREE_CODE (type) == INTEGER_TYPE)
26858 if (TYPE_MODE (type) == DFmode && align < 64)
26859 return 64;
26860 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26861 return 128;
26863 return align;
26866 /* Compute the minimum required alignment for dynamic stack realignment
26867 purposes for a local variable, parameter or a stack slot. EXP is
26868 the data type or decl itself, MODE is its mode and ALIGN is the
26869 alignment that the object would ordinarily have. */
26871 unsigned int
26872 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26873 unsigned int align)
26875 tree type, decl;
26877 if (exp && DECL_P (exp))
26879 type = TREE_TYPE (exp);
26880 decl = exp;
26882 else
26884 type = exp;
26885 decl = NULL;
26888 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26889 return align;
26891 /* Don't do dynamic stack realignment for long long objects with
26892 -mpreferred-stack-boundary=2. */
26893 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26894 && (!type || !TYPE_USER_ALIGN (type))
26895 && (!decl || !DECL_USER_ALIGN (decl)))
26896 return 32;
26898 return align;
26901 /* Find a location for the static chain incoming to a nested function.
26902 This is a register, unless all free registers are used by arguments. */
26904 static rtx
26905 ix86_static_chain (const_tree fndecl, bool incoming_p)
26907 unsigned regno;
26909 if (!DECL_STATIC_CHAIN (fndecl))
26910 return NULL;
26912 if (TARGET_64BIT)
26914 /* We always use R10 in 64-bit mode. */
26915 regno = R10_REG;
26917 else
26919 tree fntype;
26920 unsigned int ccvt;
26922 /* By default in 32-bit mode we use ECX to pass the static chain. */
26923 regno = CX_REG;
26925 fntype = TREE_TYPE (fndecl);
26926 ccvt = ix86_get_callcvt (fntype);
26927 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26929 /* Fastcall functions use ecx/edx for arguments, which leaves
26930 us with EAX for the static chain.
26931 Thiscall functions use ecx for arguments, which also
26932 leaves us with EAX for the static chain. */
26933 regno = AX_REG;
26935 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26937 /* Thiscall functions use ecx for arguments, which leaves
26938 us with EAX and EDX for the static chain.
26939 We are using for abi-compatibility EAX. */
26940 regno = AX_REG;
26942 else if (ix86_function_regparm (fntype, fndecl) == 3)
26944 /* For regparm 3, we have no free call-clobbered registers in
26945 which to store the static chain. In order to implement this,
26946 we have the trampoline push the static chain to the stack.
26947 However, we can't push a value below the return address when
26948 we call the nested function directly, so we have to use an
26949 alternate entry point. For this we use ESI, and have the
26950 alternate entry point push ESI, so that things appear the
26951 same once we're executing the nested function. */
26952 if (incoming_p)
26954 if (fndecl == current_function_decl)
26955 ix86_static_chain_on_stack = true;
26956 return gen_frame_mem (SImode,
26957 plus_constant (Pmode,
26958 arg_pointer_rtx, -8));
26960 regno = SI_REG;
26964 return gen_rtx_REG (Pmode, regno);
26967 /* Emit RTL insns to initialize the variable parts of a trampoline.
26968 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26969 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26970 to be passed to the target function. */
26972 static void
26973 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26975 rtx mem, fnaddr;
26976 int opcode;
26977 int offset = 0;
26979 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26981 if (TARGET_64BIT)
26983 int size;
26985 /* Load the function address to r11. Try to load address using
26986 the shorter movl instead of movabs. We may want to support
26987 movq for kernel mode, but kernel does not use trampolines at
26988 the moment. FNADDR is a 32bit address and may not be in
26989 DImode when ptr_mode == SImode. Always use movl in this
26990 case. */
26991 if (ptr_mode == SImode
26992 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26994 fnaddr = copy_addr_to_reg (fnaddr);
26996 mem = adjust_address (m_tramp, HImode, offset);
26997 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26999 mem = adjust_address (m_tramp, SImode, offset + 2);
27000 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27001 offset += 6;
27003 else
27005 mem = adjust_address (m_tramp, HImode, offset);
27006 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27008 mem = adjust_address (m_tramp, DImode, offset + 2);
27009 emit_move_insn (mem, fnaddr);
27010 offset += 10;
27013 /* Load static chain using movabs to r10. Use the shorter movl
27014 instead of movabs when ptr_mode == SImode. */
27015 if (ptr_mode == SImode)
27017 opcode = 0xba41;
27018 size = 6;
27020 else
27022 opcode = 0xba49;
27023 size = 10;
27026 mem = adjust_address (m_tramp, HImode, offset);
27027 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27029 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27030 emit_move_insn (mem, chain_value);
27031 offset += size;
27033 /* Jump to r11; the last (unused) byte is a nop, only there to
27034 pad the write out to a single 32-bit store. */
27035 mem = adjust_address (m_tramp, SImode, offset);
27036 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27037 offset += 4;
27039 else
27041 rtx disp, chain;
27043 /* Depending on the static chain location, either load a register
27044 with a constant, or push the constant to the stack. All of the
27045 instructions are the same size. */
27046 chain = ix86_static_chain (fndecl, true);
27047 if (REG_P (chain))
27049 switch (REGNO (chain))
27051 case AX_REG:
27052 opcode = 0xb8; break;
27053 case CX_REG:
27054 opcode = 0xb9; break;
27055 default:
27056 gcc_unreachable ();
27059 else
27060 opcode = 0x68;
27062 mem = adjust_address (m_tramp, QImode, offset);
27063 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27065 mem = adjust_address (m_tramp, SImode, offset + 1);
27066 emit_move_insn (mem, chain_value);
27067 offset += 5;
27069 mem = adjust_address (m_tramp, QImode, offset);
27070 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27072 mem = adjust_address (m_tramp, SImode, offset + 1);
27074 /* Compute offset from the end of the jmp to the target function.
27075 In the case in which the trampoline stores the static chain on
27076 the stack, we need to skip the first insn which pushes the
27077 (call-saved) register static chain; this push is 1 byte. */
27078 offset += 5;
27079 disp = expand_binop (SImode, sub_optab, fnaddr,
27080 plus_constant (Pmode, XEXP (m_tramp, 0),
27081 offset - (MEM_P (chain) ? 1 : 0)),
27082 NULL_RTX, 1, OPTAB_DIRECT);
27083 emit_move_insn (mem, disp);
27086 gcc_assert (offset <= TRAMPOLINE_SIZE);
27088 #ifdef HAVE_ENABLE_EXECUTE_STACK
27089 #ifdef CHECK_EXECUTE_STACK_ENABLED
27090 if (CHECK_EXECUTE_STACK_ENABLED)
27091 #endif
27092 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27093 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27094 #endif
27097 /* The following file contains several enumerations and data structures
27098 built from the definitions in i386-builtin-types.def. */
27100 #include "i386-builtin-types.inc"
27102 /* Table for the ix86 builtin non-function types. */
27103 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27105 /* Retrieve an element from the above table, building some of
27106 the types lazily. */
27108 static tree
27109 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27111 unsigned int index;
27112 tree type, itype;
27114 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27116 type = ix86_builtin_type_tab[(int) tcode];
27117 if (type != NULL)
27118 return type;
27120 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27121 if (tcode <= IX86_BT_LAST_VECT)
27123 enum machine_mode mode;
27125 index = tcode - IX86_BT_LAST_PRIM - 1;
27126 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27127 mode = ix86_builtin_type_vect_mode[index];
27129 type = build_vector_type_for_mode (itype, mode);
27131 else
27133 int quals;
27135 index = tcode - IX86_BT_LAST_VECT - 1;
27136 if (tcode <= IX86_BT_LAST_PTR)
27137 quals = TYPE_UNQUALIFIED;
27138 else
27139 quals = TYPE_QUAL_CONST;
27141 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27142 if (quals != TYPE_UNQUALIFIED)
27143 itype = build_qualified_type (itype, quals);
27145 type = build_pointer_type (itype);
27148 ix86_builtin_type_tab[(int) tcode] = type;
27149 return type;
27152 /* Table for the ix86 builtin function types. */
27153 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27155 /* Retrieve an element from the above table, building some of
27156 the types lazily. */
27158 static tree
27159 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27161 tree type;
27163 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27165 type = ix86_builtin_func_type_tab[(int) tcode];
27166 if (type != NULL)
27167 return type;
27169 if (tcode <= IX86_BT_LAST_FUNC)
27171 unsigned start = ix86_builtin_func_start[(int) tcode];
27172 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27173 tree rtype, atype, args = void_list_node;
27174 unsigned i;
27176 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27177 for (i = after - 1; i > start; --i)
27179 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27180 args = tree_cons (NULL, atype, args);
27183 type = build_function_type (rtype, args);
27185 else
27187 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27188 enum ix86_builtin_func_type icode;
27190 icode = ix86_builtin_func_alias_base[index];
27191 type = ix86_get_builtin_func_type (icode);
27194 ix86_builtin_func_type_tab[(int) tcode] = type;
27195 return type;
27199 /* Codes for all the SSE/MMX builtins. */
27200 enum ix86_builtins
27202 IX86_BUILTIN_ADDPS,
27203 IX86_BUILTIN_ADDSS,
27204 IX86_BUILTIN_DIVPS,
27205 IX86_BUILTIN_DIVSS,
27206 IX86_BUILTIN_MULPS,
27207 IX86_BUILTIN_MULSS,
27208 IX86_BUILTIN_SUBPS,
27209 IX86_BUILTIN_SUBSS,
27211 IX86_BUILTIN_CMPEQPS,
27212 IX86_BUILTIN_CMPLTPS,
27213 IX86_BUILTIN_CMPLEPS,
27214 IX86_BUILTIN_CMPGTPS,
27215 IX86_BUILTIN_CMPGEPS,
27216 IX86_BUILTIN_CMPNEQPS,
27217 IX86_BUILTIN_CMPNLTPS,
27218 IX86_BUILTIN_CMPNLEPS,
27219 IX86_BUILTIN_CMPNGTPS,
27220 IX86_BUILTIN_CMPNGEPS,
27221 IX86_BUILTIN_CMPORDPS,
27222 IX86_BUILTIN_CMPUNORDPS,
27223 IX86_BUILTIN_CMPEQSS,
27224 IX86_BUILTIN_CMPLTSS,
27225 IX86_BUILTIN_CMPLESS,
27226 IX86_BUILTIN_CMPNEQSS,
27227 IX86_BUILTIN_CMPNLTSS,
27228 IX86_BUILTIN_CMPNLESS,
27229 IX86_BUILTIN_CMPORDSS,
27230 IX86_BUILTIN_CMPUNORDSS,
27232 IX86_BUILTIN_COMIEQSS,
27233 IX86_BUILTIN_COMILTSS,
27234 IX86_BUILTIN_COMILESS,
27235 IX86_BUILTIN_COMIGTSS,
27236 IX86_BUILTIN_COMIGESS,
27237 IX86_BUILTIN_COMINEQSS,
27238 IX86_BUILTIN_UCOMIEQSS,
27239 IX86_BUILTIN_UCOMILTSS,
27240 IX86_BUILTIN_UCOMILESS,
27241 IX86_BUILTIN_UCOMIGTSS,
27242 IX86_BUILTIN_UCOMIGESS,
27243 IX86_BUILTIN_UCOMINEQSS,
27245 IX86_BUILTIN_CVTPI2PS,
27246 IX86_BUILTIN_CVTPS2PI,
27247 IX86_BUILTIN_CVTSI2SS,
27248 IX86_BUILTIN_CVTSI642SS,
27249 IX86_BUILTIN_CVTSS2SI,
27250 IX86_BUILTIN_CVTSS2SI64,
27251 IX86_BUILTIN_CVTTPS2PI,
27252 IX86_BUILTIN_CVTTSS2SI,
27253 IX86_BUILTIN_CVTTSS2SI64,
27255 IX86_BUILTIN_MAXPS,
27256 IX86_BUILTIN_MAXSS,
27257 IX86_BUILTIN_MINPS,
27258 IX86_BUILTIN_MINSS,
27260 IX86_BUILTIN_LOADUPS,
27261 IX86_BUILTIN_STOREUPS,
27262 IX86_BUILTIN_MOVSS,
27264 IX86_BUILTIN_MOVHLPS,
27265 IX86_BUILTIN_MOVLHPS,
27266 IX86_BUILTIN_LOADHPS,
27267 IX86_BUILTIN_LOADLPS,
27268 IX86_BUILTIN_STOREHPS,
27269 IX86_BUILTIN_STORELPS,
27271 IX86_BUILTIN_MASKMOVQ,
27272 IX86_BUILTIN_MOVMSKPS,
27273 IX86_BUILTIN_PMOVMSKB,
27275 IX86_BUILTIN_MOVNTPS,
27276 IX86_BUILTIN_MOVNTQ,
27278 IX86_BUILTIN_LOADDQU,
27279 IX86_BUILTIN_STOREDQU,
27281 IX86_BUILTIN_PACKSSWB,
27282 IX86_BUILTIN_PACKSSDW,
27283 IX86_BUILTIN_PACKUSWB,
27285 IX86_BUILTIN_PADDB,
27286 IX86_BUILTIN_PADDW,
27287 IX86_BUILTIN_PADDD,
27288 IX86_BUILTIN_PADDQ,
27289 IX86_BUILTIN_PADDSB,
27290 IX86_BUILTIN_PADDSW,
27291 IX86_BUILTIN_PADDUSB,
27292 IX86_BUILTIN_PADDUSW,
27293 IX86_BUILTIN_PSUBB,
27294 IX86_BUILTIN_PSUBW,
27295 IX86_BUILTIN_PSUBD,
27296 IX86_BUILTIN_PSUBQ,
27297 IX86_BUILTIN_PSUBSB,
27298 IX86_BUILTIN_PSUBSW,
27299 IX86_BUILTIN_PSUBUSB,
27300 IX86_BUILTIN_PSUBUSW,
27302 IX86_BUILTIN_PAND,
27303 IX86_BUILTIN_PANDN,
27304 IX86_BUILTIN_POR,
27305 IX86_BUILTIN_PXOR,
27307 IX86_BUILTIN_PAVGB,
27308 IX86_BUILTIN_PAVGW,
27310 IX86_BUILTIN_PCMPEQB,
27311 IX86_BUILTIN_PCMPEQW,
27312 IX86_BUILTIN_PCMPEQD,
27313 IX86_BUILTIN_PCMPGTB,
27314 IX86_BUILTIN_PCMPGTW,
27315 IX86_BUILTIN_PCMPGTD,
27317 IX86_BUILTIN_PMADDWD,
27319 IX86_BUILTIN_PMAXSW,
27320 IX86_BUILTIN_PMAXUB,
27321 IX86_BUILTIN_PMINSW,
27322 IX86_BUILTIN_PMINUB,
27324 IX86_BUILTIN_PMULHUW,
27325 IX86_BUILTIN_PMULHW,
27326 IX86_BUILTIN_PMULLW,
27328 IX86_BUILTIN_PSADBW,
27329 IX86_BUILTIN_PSHUFW,
27331 IX86_BUILTIN_PSLLW,
27332 IX86_BUILTIN_PSLLD,
27333 IX86_BUILTIN_PSLLQ,
27334 IX86_BUILTIN_PSRAW,
27335 IX86_BUILTIN_PSRAD,
27336 IX86_BUILTIN_PSRLW,
27337 IX86_BUILTIN_PSRLD,
27338 IX86_BUILTIN_PSRLQ,
27339 IX86_BUILTIN_PSLLWI,
27340 IX86_BUILTIN_PSLLDI,
27341 IX86_BUILTIN_PSLLQI,
27342 IX86_BUILTIN_PSRAWI,
27343 IX86_BUILTIN_PSRADI,
27344 IX86_BUILTIN_PSRLWI,
27345 IX86_BUILTIN_PSRLDI,
27346 IX86_BUILTIN_PSRLQI,
27348 IX86_BUILTIN_PUNPCKHBW,
27349 IX86_BUILTIN_PUNPCKHWD,
27350 IX86_BUILTIN_PUNPCKHDQ,
27351 IX86_BUILTIN_PUNPCKLBW,
27352 IX86_BUILTIN_PUNPCKLWD,
27353 IX86_BUILTIN_PUNPCKLDQ,
27355 IX86_BUILTIN_SHUFPS,
27357 IX86_BUILTIN_RCPPS,
27358 IX86_BUILTIN_RCPSS,
27359 IX86_BUILTIN_RSQRTPS,
27360 IX86_BUILTIN_RSQRTPS_NR,
27361 IX86_BUILTIN_RSQRTSS,
27362 IX86_BUILTIN_RSQRTF,
27363 IX86_BUILTIN_SQRTPS,
27364 IX86_BUILTIN_SQRTPS_NR,
27365 IX86_BUILTIN_SQRTSS,
27367 IX86_BUILTIN_UNPCKHPS,
27368 IX86_BUILTIN_UNPCKLPS,
27370 IX86_BUILTIN_ANDPS,
27371 IX86_BUILTIN_ANDNPS,
27372 IX86_BUILTIN_ORPS,
27373 IX86_BUILTIN_XORPS,
27375 IX86_BUILTIN_EMMS,
27376 IX86_BUILTIN_LDMXCSR,
27377 IX86_BUILTIN_STMXCSR,
27378 IX86_BUILTIN_SFENCE,
27380 IX86_BUILTIN_FXSAVE,
27381 IX86_BUILTIN_FXRSTOR,
27382 IX86_BUILTIN_FXSAVE64,
27383 IX86_BUILTIN_FXRSTOR64,
27385 IX86_BUILTIN_XSAVE,
27386 IX86_BUILTIN_XRSTOR,
27387 IX86_BUILTIN_XSAVE64,
27388 IX86_BUILTIN_XRSTOR64,
27390 IX86_BUILTIN_XSAVEOPT,
27391 IX86_BUILTIN_XSAVEOPT64,
27393 /* 3DNow! Original */
27394 IX86_BUILTIN_FEMMS,
27395 IX86_BUILTIN_PAVGUSB,
27396 IX86_BUILTIN_PF2ID,
27397 IX86_BUILTIN_PFACC,
27398 IX86_BUILTIN_PFADD,
27399 IX86_BUILTIN_PFCMPEQ,
27400 IX86_BUILTIN_PFCMPGE,
27401 IX86_BUILTIN_PFCMPGT,
27402 IX86_BUILTIN_PFMAX,
27403 IX86_BUILTIN_PFMIN,
27404 IX86_BUILTIN_PFMUL,
27405 IX86_BUILTIN_PFRCP,
27406 IX86_BUILTIN_PFRCPIT1,
27407 IX86_BUILTIN_PFRCPIT2,
27408 IX86_BUILTIN_PFRSQIT1,
27409 IX86_BUILTIN_PFRSQRT,
27410 IX86_BUILTIN_PFSUB,
27411 IX86_BUILTIN_PFSUBR,
27412 IX86_BUILTIN_PI2FD,
27413 IX86_BUILTIN_PMULHRW,
27415 /* 3DNow! Athlon Extensions */
27416 IX86_BUILTIN_PF2IW,
27417 IX86_BUILTIN_PFNACC,
27418 IX86_BUILTIN_PFPNACC,
27419 IX86_BUILTIN_PI2FW,
27420 IX86_BUILTIN_PSWAPDSI,
27421 IX86_BUILTIN_PSWAPDSF,
27423 /* SSE2 */
27424 IX86_BUILTIN_ADDPD,
27425 IX86_BUILTIN_ADDSD,
27426 IX86_BUILTIN_DIVPD,
27427 IX86_BUILTIN_DIVSD,
27428 IX86_BUILTIN_MULPD,
27429 IX86_BUILTIN_MULSD,
27430 IX86_BUILTIN_SUBPD,
27431 IX86_BUILTIN_SUBSD,
27433 IX86_BUILTIN_CMPEQPD,
27434 IX86_BUILTIN_CMPLTPD,
27435 IX86_BUILTIN_CMPLEPD,
27436 IX86_BUILTIN_CMPGTPD,
27437 IX86_BUILTIN_CMPGEPD,
27438 IX86_BUILTIN_CMPNEQPD,
27439 IX86_BUILTIN_CMPNLTPD,
27440 IX86_BUILTIN_CMPNLEPD,
27441 IX86_BUILTIN_CMPNGTPD,
27442 IX86_BUILTIN_CMPNGEPD,
27443 IX86_BUILTIN_CMPORDPD,
27444 IX86_BUILTIN_CMPUNORDPD,
27445 IX86_BUILTIN_CMPEQSD,
27446 IX86_BUILTIN_CMPLTSD,
27447 IX86_BUILTIN_CMPLESD,
27448 IX86_BUILTIN_CMPNEQSD,
27449 IX86_BUILTIN_CMPNLTSD,
27450 IX86_BUILTIN_CMPNLESD,
27451 IX86_BUILTIN_CMPORDSD,
27452 IX86_BUILTIN_CMPUNORDSD,
27454 IX86_BUILTIN_COMIEQSD,
27455 IX86_BUILTIN_COMILTSD,
27456 IX86_BUILTIN_COMILESD,
27457 IX86_BUILTIN_COMIGTSD,
27458 IX86_BUILTIN_COMIGESD,
27459 IX86_BUILTIN_COMINEQSD,
27460 IX86_BUILTIN_UCOMIEQSD,
27461 IX86_BUILTIN_UCOMILTSD,
27462 IX86_BUILTIN_UCOMILESD,
27463 IX86_BUILTIN_UCOMIGTSD,
27464 IX86_BUILTIN_UCOMIGESD,
27465 IX86_BUILTIN_UCOMINEQSD,
27467 IX86_BUILTIN_MAXPD,
27468 IX86_BUILTIN_MAXSD,
27469 IX86_BUILTIN_MINPD,
27470 IX86_BUILTIN_MINSD,
27472 IX86_BUILTIN_ANDPD,
27473 IX86_BUILTIN_ANDNPD,
27474 IX86_BUILTIN_ORPD,
27475 IX86_BUILTIN_XORPD,
27477 IX86_BUILTIN_SQRTPD,
27478 IX86_BUILTIN_SQRTSD,
27480 IX86_BUILTIN_UNPCKHPD,
27481 IX86_BUILTIN_UNPCKLPD,
27483 IX86_BUILTIN_SHUFPD,
27485 IX86_BUILTIN_LOADUPD,
27486 IX86_BUILTIN_STOREUPD,
27487 IX86_BUILTIN_MOVSD,
27489 IX86_BUILTIN_LOADHPD,
27490 IX86_BUILTIN_LOADLPD,
27492 IX86_BUILTIN_CVTDQ2PD,
27493 IX86_BUILTIN_CVTDQ2PS,
27495 IX86_BUILTIN_CVTPD2DQ,
27496 IX86_BUILTIN_CVTPD2PI,
27497 IX86_BUILTIN_CVTPD2PS,
27498 IX86_BUILTIN_CVTTPD2DQ,
27499 IX86_BUILTIN_CVTTPD2PI,
27501 IX86_BUILTIN_CVTPI2PD,
27502 IX86_BUILTIN_CVTSI2SD,
27503 IX86_BUILTIN_CVTSI642SD,
27505 IX86_BUILTIN_CVTSD2SI,
27506 IX86_BUILTIN_CVTSD2SI64,
27507 IX86_BUILTIN_CVTSD2SS,
27508 IX86_BUILTIN_CVTSS2SD,
27509 IX86_BUILTIN_CVTTSD2SI,
27510 IX86_BUILTIN_CVTTSD2SI64,
27512 IX86_BUILTIN_CVTPS2DQ,
27513 IX86_BUILTIN_CVTPS2PD,
27514 IX86_BUILTIN_CVTTPS2DQ,
27516 IX86_BUILTIN_MOVNTI,
27517 IX86_BUILTIN_MOVNTI64,
27518 IX86_BUILTIN_MOVNTPD,
27519 IX86_BUILTIN_MOVNTDQ,
27521 IX86_BUILTIN_MOVQ128,
27523 /* SSE2 MMX */
27524 IX86_BUILTIN_MASKMOVDQU,
27525 IX86_BUILTIN_MOVMSKPD,
27526 IX86_BUILTIN_PMOVMSKB128,
27528 IX86_BUILTIN_PACKSSWB128,
27529 IX86_BUILTIN_PACKSSDW128,
27530 IX86_BUILTIN_PACKUSWB128,
27532 IX86_BUILTIN_PADDB128,
27533 IX86_BUILTIN_PADDW128,
27534 IX86_BUILTIN_PADDD128,
27535 IX86_BUILTIN_PADDQ128,
27536 IX86_BUILTIN_PADDSB128,
27537 IX86_BUILTIN_PADDSW128,
27538 IX86_BUILTIN_PADDUSB128,
27539 IX86_BUILTIN_PADDUSW128,
27540 IX86_BUILTIN_PSUBB128,
27541 IX86_BUILTIN_PSUBW128,
27542 IX86_BUILTIN_PSUBD128,
27543 IX86_BUILTIN_PSUBQ128,
27544 IX86_BUILTIN_PSUBSB128,
27545 IX86_BUILTIN_PSUBSW128,
27546 IX86_BUILTIN_PSUBUSB128,
27547 IX86_BUILTIN_PSUBUSW128,
27549 IX86_BUILTIN_PAND128,
27550 IX86_BUILTIN_PANDN128,
27551 IX86_BUILTIN_POR128,
27552 IX86_BUILTIN_PXOR128,
27554 IX86_BUILTIN_PAVGB128,
27555 IX86_BUILTIN_PAVGW128,
27557 IX86_BUILTIN_PCMPEQB128,
27558 IX86_BUILTIN_PCMPEQW128,
27559 IX86_BUILTIN_PCMPEQD128,
27560 IX86_BUILTIN_PCMPGTB128,
27561 IX86_BUILTIN_PCMPGTW128,
27562 IX86_BUILTIN_PCMPGTD128,
27564 IX86_BUILTIN_PMADDWD128,
27566 IX86_BUILTIN_PMAXSW128,
27567 IX86_BUILTIN_PMAXUB128,
27568 IX86_BUILTIN_PMINSW128,
27569 IX86_BUILTIN_PMINUB128,
27571 IX86_BUILTIN_PMULUDQ,
27572 IX86_BUILTIN_PMULUDQ128,
27573 IX86_BUILTIN_PMULHUW128,
27574 IX86_BUILTIN_PMULHW128,
27575 IX86_BUILTIN_PMULLW128,
27577 IX86_BUILTIN_PSADBW128,
27578 IX86_BUILTIN_PSHUFHW,
27579 IX86_BUILTIN_PSHUFLW,
27580 IX86_BUILTIN_PSHUFD,
27582 IX86_BUILTIN_PSLLDQI128,
27583 IX86_BUILTIN_PSLLWI128,
27584 IX86_BUILTIN_PSLLDI128,
27585 IX86_BUILTIN_PSLLQI128,
27586 IX86_BUILTIN_PSRAWI128,
27587 IX86_BUILTIN_PSRADI128,
27588 IX86_BUILTIN_PSRLDQI128,
27589 IX86_BUILTIN_PSRLWI128,
27590 IX86_BUILTIN_PSRLDI128,
27591 IX86_BUILTIN_PSRLQI128,
27593 IX86_BUILTIN_PSLLDQ128,
27594 IX86_BUILTIN_PSLLW128,
27595 IX86_BUILTIN_PSLLD128,
27596 IX86_BUILTIN_PSLLQ128,
27597 IX86_BUILTIN_PSRAW128,
27598 IX86_BUILTIN_PSRAD128,
27599 IX86_BUILTIN_PSRLW128,
27600 IX86_BUILTIN_PSRLD128,
27601 IX86_BUILTIN_PSRLQ128,
27603 IX86_BUILTIN_PUNPCKHBW128,
27604 IX86_BUILTIN_PUNPCKHWD128,
27605 IX86_BUILTIN_PUNPCKHDQ128,
27606 IX86_BUILTIN_PUNPCKHQDQ128,
27607 IX86_BUILTIN_PUNPCKLBW128,
27608 IX86_BUILTIN_PUNPCKLWD128,
27609 IX86_BUILTIN_PUNPCKLDQ128,
27610 IX86_BUILTIN_PUNPCKLQDQ128,
27612 IX86_BUILTIN_CLFLUSH,
27613 IX86_BUILTIN_MFENCE,
27614 IX86_BUILTIN_LFENCE,
27615 IX86_BUILTIN_PAUSE,
27617 IX86_BUILTIN_FNSTENV,
27618 IX86_BUILTIN_FLDENV,
27619 IX86_BUILTIN_FNSTSW,
27620 IX86_BUILTIN_FNCLEX,
27622 IX86_BUILTIN_BSRSI,
27623 IX86_BUILTIN_BSRDI,
27624 IX86_BUILTIN_RDPMC,
27625 IX86_BUILTIN_RDTSC,
27626 IX86_BUILTIN_RDTSCP,
27627 IX86_BUILTIN_ROLQI,
27628 IX86_BUILTIN_ROLHI,
27629 IX86_BUILTIN_RORQI,
27630 IX86_BUILTIN_RORHI,
27632 /* SSE3. */
27633 IX86_BUILTIN_ADDSUBPS,
27634 IX86_BUILTIN_HADDPS,
27635 IX86_BUILTIN_HSUBPS,
27636 IX86_BUILTIN_MOVSHDUP,
27637 IX86_BUILTIN_MOVSLDUP,
27638 IX86_BUILTIN_ADDSUBPD,
27639 IX86_BUILTIN_HADDPD,
27640 IX86_BUILTIN_HSUBPD,
27641 IX86_BUILTIN_LDDQU,
27643 IX86_BUILTIN_MONITOR,
27644 IX86_BUILTIN_MWAIT,
27646 /* SSSE3. */
27647 IX86_BUILTIN_PHADDW,
27648 IX86_BUILTIN_PHADDD,
27649 IX86_BUILTIN_PHADDSW,
27650 IX86_BUILTIN_PHSUBW,
27651 IX86_BUILTIN_PHSUBD,
27652 IX86_BUILTIN_PHSUBSW,
27653 IX86_BUILTIN_PMADDUBSW,
27654 IX86_BUILTIN_PMULHRSW,
27655 IX86_BUILTIN_PSHUFB,
27656 IX86_BUILTIN_PSIGNB,
27657 IX86_BUILTIN_PSIGNW,
27658 IX86_BUILTIN_PSIGND,
27659 IX86_BUILTIN_PALIGNR,
27660 IX86_BUILTIN_PABSB,
27661 IX86_BUILTIN_PABSW,
27662 IX86_BUILTIN_PABSD,
27664 IX86_BUILTIN_PHADDW128,
27665 IX86_BUILTIN_PHADDD128,
27666 IX86_BUILTIN_PHADDSW128,
27667 IX86_BUILTIN_PHSUBW128,
27668 IX86_BUILTIN_PHSUBD128,
27669 IX86_BUILTIN_PHSUBSW128,
27670 IX86_BUILTIN_PMADDUBSW128,
27671 IX86_BUILTIN_PMULHRSW128,
27672 IX86_BUILTIN_PSHUFB128,
27673 IX86_BUILTIN_PSIGNB128,
27674 IX86_BUILTIN_PSIGNW128,
27675 IX86_BUILTIN_PSIGND128,
27676 IX86_BUILTIN_PALIGNR128,
27677 IX86_BUILTIN_PABSB128,
27678 IX86_BUILTIN_PABSW128,
27679 IX86_BUILTIN_PABSD128,
27681 /* AMDFAM10 - SSE4A New Instructions. */
27682 IX86_BUILTIN_MOVNTSD,
27683 IX86_BUILTIN_MOVNTSS,
27684 IX86_BUILTIN_EXTRQI,
27685 IX86_BUILTIN_EXTRQ,
27686 IX86_BUILTIN_INSERTQI,
27687 IX86_BUILTIN_INSERTQ,
27689 /* SSE4.1. */
27690 IX86_BUILTIN_BLENDPD,
27691 IX86_BUILTIN_BLENDPS,
27692 IX86_BUILTIN_BLENDVPD,
27693 IX86_BUILTIN_BLENDVPS,
27694 IX86_BUILTIN_PBLENDVB128,
27695 IX86_BUILTIN_PBLENDW128,
27697 IX86_BUILTIN_DPPD,
27698 IX86_BUILTIN_DPPS,
27700 IX86_BUILTIN_INSERTPS128,
27702 IX86_BUILTIN_MOVNTDQA,
27703 IX86_BUILTIN_MPSADBW128,
27704 IX86_BUILTIN_PACKUSDW128,
27705 IX86_BUILTIN_PCMPEQQ,
27706 IX86_BUILTIN_PHMINPOSUW128,
27708 IX86_BUILTIN_PMAXSB128,
27709 IX86_BUILTIN_PMAXSD128,
27710 IX86_BUILTIN_PMAXUD128,
27711 IX86_BUILTIN_PMAXUW128,
27713 IX86_BUILTIN_PMINSB128,
27714 IX86_BUILTIN_PMINSD128,
27715 IX86_BUILTIN_PMINUD128,
27716 IX86_BUILTIN_PMINUW128,
27718 IX86_BUILTIN_PMOVSXBW128,
27719 IX86_BUILTIN_PMOVSXBD128,
27720 IX86_BUILTIN_PMOVSXBQ128,
27721 IX86_BUILTIN_PMOVSXWD128,
27722 IX86_BUILTIN_PMOVSXWQ128,
27723 IX86_BUILTIN_PMOVSXDQ128,
27725 IX86_BUILTIN_PMOVZXBW128,
27726 IX86_BUILTIN_PMOVZXBD128,
27727 IX86_BUILTIN_PMOVZXBQ128,
27728 IX86_BUILTIN_PMOVZXWD128,
27729 IX86_BUILTIN_PMOVZXWQ128,
27730 IX86_BUILTIN_PMOVZXDQ128,
27732 IX86_BUILTIN_PMULDQ128,
27733 IX86_BUILTIN_PMULLD128,
27735 IX86_BUILTIN_ROUNDSD,
27736 IX86_BUILTIN_ROUNDSS,
27738 IX86_BUILTIN_ROUNDPD,
27739 IX86_BUILTIN_ROUNDPS,
27741 IX86_BUILTIN_FLOORPD,
27742 IX86_BUILTIN_CEILPD,
27743 IX86_BUILTIN_TRUNCPD,
27744 IX86_BUILTIN_RINTPD,
27745 IX86_BUILTIN_ROUNDPD_AZ,
27747 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27748 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27749 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27751 IX86_BUILTIN_FLOORPS,
27752 IX86_BUILTIN_CEILPS,
27753 IX86_BUILTIN_TRUNCPS,
27754 IX86_BUILTIN_RINTPS,
27755 IX86_BUILTIN_ROUNDPS_AZ,
27757 IX86_BUILTIN_FLOORPS_SFIX,
27758 IX86_BUILTIN_CEILPS_SFIX,
27759 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27761 IX86_BUILTIN_PTESTZ,
27762 IX86_BUILTIN_PTESTC,
27763 IX86_BUILTIN_PTESTNZC,
27765 IX86_BUILTIN_VEC_INIT_V2SI,
27766 IX86_BUILTIN_VEC_INIT_V4HI,
27767 IX86_BUILTIN_VEC_INIT_V8QI,
27768 IX86_BUILTIN_VEC_EXT_V2DF,
27769 IX86_BUILTIN_VEC_EXT_V2DI,
27770 IX86_BUILTIN_VEC_EXT_V4SF,
27771 IX86_BUILTIN_VEC_EXT_V4SI,
27772 IX86_BUILTIN_VEC_EXT_V8HI,
27773 IX86_BUILTIN_VEC_EXT_V2SI,
27774 IX86_BUILTIN_VEC_EXT_V4HI,
27775 IX86_BUILTIN_VEC_EXT_V16QI,
27776 IX86_BUILTIN_VEC_SET_V2DI,
27777 IX86_BUILTIN_VEC_SET_V4SF,
27778 IX86_BUILTIN_VEC_SET_V4SI,
27779 IX86_BUILTIN_VEC_SET_V8HI,
27780 IX86_BUILTIN_VEC_SET_V4HI,
27781 IX86_BUILTIN_VEC_SET_V16QI,
27783 IX86_BUILTIN_VEC_PACK_SFIX,
27784 IX86_BUILTIN_VEC_PACK_SFIX256,
27786 /* SSE4.2. */
27787 IX86_BUILTIN_CRC32QI,
27788 IX86_BUILTIN_CRC32HI,
27789 IX86_BUILTIN_CRC32SI,
27790 IX86_BUILTIN_CRC32DI,
27792 IX86_BUILTIN_PCMPESTRI128,
27793 IX86_BUILTIN_PCMPESTRM128,
27794 IX86_BUILTIN_PCMPESTRA128,
27795 IX86_BUILTIN_PCMPESTRC128,
27796 IX86_BUILTIN_PCMPESTRO128,
27797 IX86_BUILTIN_PCMPESTRS128,
27798 IX86_BUILTIN_PCMPESTRZ128,
27799 IX86_BUILTIN_PCMPISTRI128,
27800 IX86_BUILTIN_PCMPISTRM128,
27801 IX86_BUILTIN_PCMPISTRA128,
27802 IX86_BUILTIN_PCMPISTRC128,
27803 IX86_BUILTIN_PCMPISTRO128,
27804 IX86_BUILTIN_PCMPISTRS128,
27805 IX86_BUILTIN_PCMPISTRZ128,
27807 IX86_BUILTIN_PCMPGTQ,
27809 /* AES instructions */
27810 IX86_BUILTIN_AESENC128,
27811 IX86_BUILTIN_AESENCLAST128,
27812 IX86_BUILTIN_AESDEC128,
27813 IX86_BUILTIN_AESDECLAST128,
27814 IX86_BUILTIN_AESIMC128,
27815 IX86_BUILTIN_AESKEYGENASSIST128,
27817 /* PCLMUL instruction */
27818 IX86_BUILTIN_PCLMULQDQ128,
27820 /* AVX */
27821 IX86_BUILTIN_ADDPD256,
27822 IX86_BUILTIN_ADDPS256,
27823 IX86_BUILTIN_ADDSUBPD256,
27824 IX86_BUILTIN_ADDSUBPS256,
27825 IX86_BUILTIN_ANDPD256,
27826 IX86_BUILTIN_ANDPS256,
27827 IX86_BUILTIN_ANDNPD256,
27828 IX86_BUILTIN_ANDNPS256,
27829 IX86_BUILTIN_BLENDPD256,
27830 IX86_BUILTIN_BLENDPS256,
27831 IX86_BUILTIN_BLENDVPD256,
27832 IX86_BUILTIN_BLENDVPS256,
27833 IX86_BUILTIN_DIVPD256,
27834 IX86_BUILTIN_DIVPS256,
27835 IX86_BUILTIN_DPPS256,
27836 IX86_BUILTIN_HADDPD256,
27837 IX86_BUILTIN_HADDPS256,
27838 IX86_BUILTIN_HSUBPD256,
27839 IX86_BUILTIN_HSUBPS256,
27840 IX86_BUILTIN_MAXPD256,
27841 IX86_BUILTIN_MAXPS256,
27842 IX86_BUILTIN_MINPD256,
27843 IX86_BUILTIN_MINPS256,
27844 IX86_BUILTIN_MULPD256,
27845 IX86_BUILTIN_MULPS256,
27846 IX86_BUILTIN_ORPD256,
27847 IX86_BUILTIN_ORPS256,
27848 IX86_BUILTIN_SHUFPD256,
27849 IX86_BUILTIN_SHUFPS256,
27850 IX86_BUILTIN_SUBPD256,
27851 IX86_BUILTIN_SUBPS256,
27852 IX86_BUILTIN_XORPD256,
27853 IX86_BUILTIN_XORPS256,
27854 IX86_BUILTIN_CMPSD,
27855 IX86_BUILTIN_CMPSS,
27856 IX86_BUILTIN_CMPPD,
27857 IX86_BUILTIN_CMPPS,
27858 IX86_BUILTIN_CMPPD256,
27859 IX86_BUILTIN_CMPPS256,
27860 IX86_BUILTIN_CVTDQ2PD256,
27861 IX86_BUILTIN_CVTDQ2PS256,
27862 IX86_BUILTIN_CVTPD2PS256,
27863 IX86_BUILTIN_CVTPS2DQ256,
27864 IX86_BUILTIN_CVTPS2PD256,
27865 IX86_BUILTIN_CVTTPD2DQ256,
27866 IX86_BUILTIN_CVTPD2DQ256,
27867 IX86_BUILTIN_CVTTPS2DQ256,
27868 IX86_BUILTIN_EXTRACTF128PD256,
27869 IX86_BUILTIN_EXTRACTF128PS256,
27870 IX86_BUILTIN_EXTRACTF128SI256,
27871 IX86_BUILTIN_VZEROALL,
27872 IX86_BUILTIN_VZEROUPPER,
27873 IX86_BUILTIN_VPERMILVARPD,
27874 IX86_BUILTIN_VPERMILVARPS,
27875 IX86_BUILTIN_VPERMILVARPD256,
27876 IX86_BUILTIN_VPERMILVARPS256,
27877 IX86_BUILTIN_VPERMILPD,
27878 IX86_BUILTIN_VPERMILPS,
27879 IX86_BUILTIN_VPERMILPD256,
27880 IX86_BUILTIN_VPERMILPS256,
27881 IX86_BUILTIN_VPERMIL2PD,
27882 IX86_BUILTIN_VPERMIL2PS,
27883 IX86_BUILTIN_VPERMIL2PD256,
27884 IX86_BUILTIN_VPERMIL2PS256,
27885 IX86_BUILTIN_VPERM2F128PD256,
27886 IX86_BUILTIN_VPERM2F128PS256,
27887 IX86_BUILTIN_VPERM2F128SI256,
27888 IX86_BUILTIN_VBROADCASTSS,
27889 IX86_BUILTIN_VBROADCASTSD256,
27890 IX86_BUILTIN_VBROADCASTSS256,
27891 IX86_BUILTIN_VBROADCASTPD256,
27892 IX86_BUILTIN_VBROADCASTPS256,
27893 IX86_BUILTIN_VINSERTF128PD256,
27894 IX86_BUILTIN_VINSERTF128PS256,
27895 IX86_BUILTIN_VINSERTF128SI256,
27896 IX86_BUILTIN_LOADUPD256,
27897 IX86_BUILTIN_LOADUPS256,
27898 IX86_BUILTIN_STOREUPD256,
27899 IX86_BUILTIN_STOREUPS256,
27900 IX86_BUILTIN_LDDQU256,
27901 IX86_BUILTIN_MOVNTDQ256,
27902 IX86_BUILTIN_MOVNTPD256,
27903 IX86_BUILTIN_MOVNTPS256,
27904 IX86_BUILTIN_LOADDQU256,
27905 IX86_BUILTIN_STOREDQU256,
27906 IX86_BUILTIN_MASKLOADPD,
27907 IX86_BUILTIN_MASKLOADPS,
27908 IX86_BUILTIN_MASKSTOREPD,
27909 IX86_BUILTIN_MASKSTOREPS,
27910 IX86_BUILTIN_MASKLOADPD256,
27911 IX86_BUILTIN_MASKLOADPS256,
27912 IX86_BUILTIN_MASKSTOREPD256,
27913 IX86_BUILTIN_MASKSTOREPS256,
27914 IX86_BUILTIN_MOVSHDUP256,
27915 IX86_BUILTIN_MOVSLDUP256,
27916 IX86_BUILTIN_MOVDDUP256,
27918 IX86_BUILTIN_SQRTPD256,
27919 IX86_BUILTIN_SQRTPS256,
27920 IX86_BUILTIN_SQRTPS_NR256,
27921 IX86_BUILTIN_RSQRTPS256,
27922 IX86_BUILTIN_RSQRTPS_NR256,
27924 IX86_BUILTIN_RCPPS256,
27926 IX86_BUILTIN_ROUNDPD256,
27927 IX86_BUILTIN_ROUNDPS256,
27929 IX86_BUILTIN_FLOORPD256,
27930 IX86_BUILTIN_CEILPD256,
27931 IX86_BUILTIN_TRUNCPD256,
27932 IX86_BUILTIN_RINTPD256,
27933 IX86_BUILTIN_ROUNDPD_AZ256,
27935 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27936 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27937 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27939 IX86_BUILTIN_FLOORPS256,
27940 IX86_BUILTIN_CEILPS256,
27941 IX86_BUILTIN_TRUNCPS256,
27942 IX86_BUILTIN_RINTPS256,
27943 IX86_BUILTIN_ROUNDPS_AZ256,
27945 IX86_BUILTIN_FLOORPS_SFIX256,
27946 IX86_BUILTIN_CEILPS_SFIX256,
27947 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27949 IX86_BUILTIN_UNPCKHPD256,
27950 IX86_BUILTIN_UNPCKLPD256,
27951 IX86_BUILTIN_UNPCKHPS256,
27952 IX86_BUILTIN_UNPCKLPS256,
27954 IX86_BUILTIN_SI256_SI,
27955 IX86_BUILTIN_PS256_PS,
27956 IX86_BUILTIN_PD256_PD,
27957 IX86_BUILTIN_SI_SI256,
27958 IX86_BUILTIN_PS_PS256,
27959 IX86_BUILTIN_PD_PD256,
27961 IX86_BUILTIN_VTESTZPD,
27962 IX86_BUILTIN_VTESTCPD,
27963 IX86_BUILTIN_VTESTNZCPD,
27964 IX86_BUILTIN_VTESTZPS,
27965 IX86_BUILTIN_VTESTCPS,
27966 IX86_BUILTIN_VTESTNZCPS,
27967 IX86_BUILTIN_VTESTZPD256,
27968 IX86_BUILTIN_VTESTCPD256,
27969 IX86_BUILTIN_VTESTNZCPD256,
27970 IX86_BUILTIN_VTESTZPS256,
27971 IX86_BUILTIN_VTESTCPS256,
27972 IX86_BUILTIN_VTESTNZCPS256,
27973 IX86_BUILTIN_PTESTZ256,
27974 IX86_BUILTIN_PTESTC256,
27975 IX86_BUILTIN_PTESTNZC256,
27977 IX86_BUILTIN_MOVMSKPD256,
27978 IX86_BUILTIN_MOVMSKPS256,
27980 /* AVX2 */
27981 IX86_BUILTIN_MPSADBW256,
27982 IX86_BUILTIN_PABSB256,
27983 IX86_BUILTIN_PABSW256,
27984 IX86_BUILTIN_PABSD256,
27985 IX86_BUILTIN_PACKSSDW256,
27986 IX86_BUILTIN_PACKSSWB256,
27987 IX86_BUILTIN_PACKUSDW256,
27988 IX86_BUILTIN_PACKUSWB256,
27989 IX86_BUILTIN_PADDB256,
27990 IX86_BUILTIN_PADDW256,
27991 IX86_BUILTIN_PADDD256,
27992 IX86_BUILTIN_PADDQ256,
27993 IX86_BUILTIN_PADDSB256,
27994 IX86_BUILTIN_PADDSW256,
27995 IX86_BUILTIN_PADDUSB256,
27996 IX86_BUILTIN_PADDUSW256,
27997 IX86_BUILTIN_PALIGNR256,
27998 IX86_BUILTIN_AND256I,
27999 IX86_BUILTIN_ANDNOT256I,
28000 IX86_BUILTIN_PAVGB256,
28001 IX86_BUILTIN_PAVGW256,
28002 IX86_BUILTIN_PBLENDVB256,
28003 IX86_BUILTIN_PBLENDVW256,
28004 IX86_BUILTIN_PCMPEQB256,
28005 IX86_BUILTIN_PCMPEQW256,
28006 IX86_BUILTIN_PCMPEQD256,
28007 IX86_BUILTIN_PCMPEQQ256,
28008 IX86_BUILTIN_PCMPGTB256,
28009 IX86_BUILTIN_PCMPGTW256,
28010 IX86_BUILTIN_PCMPGTD256,
28011 IX86_BUILTIN_PCMPGTQ256,
28012 IX86_BUILTIN_PHADDW256,
28013 IX86_BUILTIN_PHADDD256,
28014 IX86_BUILTIN_PHADDSW256,
28015 IX86_BUILTIN_PHSUBW256,
28016 IX86_BUILTIN_PHSUBD256,
28017 IX86_BUILTIN_PHSUBSW256,
28018 IX86_BUILTIN_PMADDUBSW256,
28019 IX86_BUILTIN_PMADDWD256,
28020 IX86_BUILTIN_PMAXSB256,
28021 IX86_BUILTIN_PMAXSW256,
28022 IX86_BUILTIN_PMAXSD256,
28023 IX86_BUILTIN_PMAXUB256,
28024 IX86_BUILTIN_PMAXUW256,
28025 IX86_BUILTIN_PMAXUD256,
28026 IX86_BUILTIN_PMINSB256,
28027 IX86_BUILTIN_PMINSW256,
28028 IX86_BUILTIN_PMINSD256,
28029 IX86_BUILTIN_PMINUB256,
28030 IX86_BUILTIN_PMINUW256,
28031 IX86_BUILTIN_PMINUD256,
28032 IX86_BUILTIN_PMOVMSKB256,
28033 IX86_BUILTIN_PMOVSXBW256,
28034 IX86_BUILTIN_PMOVSXBD256,
28035 IX86_BUILTIN_PMOVSXBQ256,
28036 IX86_BUILTIN_PMOVSXWD256,
28037 IX86_BUILTIN_PMOVSXWQ256,
28038 IX86_BUILTIN_PMOVSXDQ256,
28039 IX86_BUILTIN_PMOVZXBW256,
28040 IX86_BUILTIN_PMOVZXBD256,
28041 IX86_BUILTIN_PMOVZXBQ256,
28042 IX86_BUILTIN_PMOVZXWD256,
28043 IX86_BUILTIN_PMOVZXWQ256,
28044 IX86_BUILTIN_PMOVZXDQ256,
28045 IX86_BUILTIN_PMULDQ256,
28046 IX86_BUILTIN_PMULHRSW256,
28047 IX86_BUILTIN_PMULHUW256,
28048 IX86_BUILTIN_PMULHW256,
28049 IX86_BUILTIN_PMULLW256,
28050 IX86_BUILTIN_PMULLD256,
28051 IX86_BUILTIN_PMULUDQ256,
28052 IX86_BUILTIN_POR256,
28053 IX86_BUILTIN_PSADBW256,
28054 IX86_BUILTIN_PSHUFB256,
28055 IX86_BUILTIN_PSHUFD256,
28056 IX86_BUILTIN_PSHUFHW256,
28057 IX86_BUILTIN_PSHUFLW256,
28058 IX86_BUILTIN_PSIGNB256,
28059 IX86_BUILTIN_PSIGNW256,
28060 IX86_BUILTIN_PSIGND256,
28061 IX86_BUILTIN_PSLLDQI256,
28062 IX86_BUILTIN_PSLLWI256,
28063 IX86_BUILTIN_PSLLW256,
28064 IX86_BUILTIN_PSLLDI256,
28065 IX86_BUILTIN_PSLLD256,
28066 IX86_BUILTIN_PSLLQI256,
28067 IX86_BUILTIN_PSLLQ256,
28068 IX86_BUILTIN_PSRAWI256,
28069 IX86_BUILTIN_PSRAW256,
28070 IX86_BUILTIN_PSRADI256,
28071 IX86_BUILTIN_PSRAD256,
28072 IX86_BUILTIN_PSRLDQI256,
28073 IX86_BUILTIN_PSRLWI256,
28074 IX86_BUILTIN_PSRLW256,
28075 IX86_BUILTIN_PSRLDI256,
28076 IX86_BUILTIN_PSRLD256,
28077 IX86_BUILTIN_PSRLQI256,
28078 IX86_BUILTIN_PSRLQ256,
28079 IX86_BUILTIN_PSUBB256,
28080 IX86_BUILTIN_PSUBW256,
28081 IX86_BUILTIN_PSUBD256,
28082 IX86_BUILTIN_PSUBQ256,
28083 IX86_BUILTIN_PSUBSB256,
28084 IX86_BUILTIN_PSUBSW256,
28085 IX86_BUILTIN_PSUBUSB256,
28086 IX86_BUILTIN_PSUBUSW256,
28087 IX86_BUILTIN_PUNPCKHBW256,
28088 IX86_BUILTIN_PUNPCKHWD256,
28089 IX86_BUILTIN_PUNPCKHDQ256,
28090 IX86_BUILTIN_PUNPCKHQDQ256,
28091 IX86_BUILTIN_PUNPCKLBW256,
28092 IX86_BUILTIN_PUNPCKLWD256,
28093 IX86_BUILTIN_PUNPCKLDQ256,
28094 IX86_BUILTIN_PUNPCKLQDQ256,
28095 IX86_BUILTIN_PXOR256,
28096 IX86_BUILTIN_MOVNTDQA256,
28097 IX86_BUILTIN_VBROADCASTSS_PS,
28098 IX86_BUILTIN_VBROADCASTSS_PS256,
28099 IX86_BUILTIN_VBROADCASTSD_PD256,
28100 IX86_BUILTIN_VBROADCASTSI256,
28101 IX86_BUILTIN_PBLENDD256,
28102 IX86_BUILTIN_PBLENDD128,
28103 IX86_BUILTIN_PBROADCASTB256,
28104 IX86_BUILTIN_PBROADCASTW256,
28105 IX86_BUILTIN_PBROADCASTD256,
28106 IX86_BUILTIN_PBROADCASTQ256,
28107 IX86_BUILTIN_PBROADCASTB128,
28108 IX86_BUILTIN_PBROADCASTW128,
28109 IX86_BUILTIN_PBROADCASTD128,
28110 IX86_BUILTIN_PBROADCASTQ128,
28111 IX86_BUILTIN_VPERMVARSI256,
28112 IX86_BUILTIN_VPERMDF256,
28113 IX86_BUILTIN_VPERMVARSF256,
28114 IX86_BUILTIN_VPERMDI256,
28115 IX86_BUILTIN_VPERMTI256,
28116 IX86_BUILTIN_VEXTRACT128I256,
28117 IX86_BUILTIN_VINSERT128I256,
28118 IX86_BUILTIN_MASKLOADD,
28119 IX86_BUILTIN_MASKLOADQ,
28120 IX86_BUILTIN_MASKLOADD256,
28121 IX86_BUILTIN_MASKLOADQ256,
28122 IX86_BUILTIN_MASKSTORED,
28123 IX86_BUILTIN_MASKSTOREQ,
28124 IX86_BUILTIN_MASKSTORED256,
28125 IX86_BUILTIN_MASKSTOREQ256,
28126 IX86_BUILTIN_PSLLVV4DI,
28127 IX86_BUILTIN_PSLLVV2DI,
28128 IX86_BUILTIN_PSLLVV8SI,
28129 IX86_BUILTIN_PSLLVV4SI,
28130 IX86_BUILTIN_PSRAVV8SI,
28131 IX86_BUILTIN_PSRAVV4SI,
28132 IX86_BUILTIN_PSRLVV4DI,
28133 IX86_BUILTIN_PSRLVV2DI,
28134 IX86_BUILTIN_PSRLVV8SI,
28135 IX86_BUILTIN_PSRLVV4SI,
28137 IX86_BUILTIN_GATHERSIV2DF,
28138 IX86_BUILTIN_GATHERSIV4DF,
28139 IX86_BUILTIN_GATHERDIV2DF,
28140 IX86_BUILTIN_GATHERDIV4DF,
28141 IX86_BUILTIN_GATHERSIV4SF,
28142 IX86_BUILTIN_GATHERSIV8SF,
28143 IX86_BUILTIN_GATHERDIV4SF,
28144 IX86_BUILTIN_GATHERDIV8SF,
28145 IX86_BUILTIN_GATHERSIV2DI,
28146 IX86_BUILTIN_GATHERSIV4DI,
28147 IX86_BUILTIN_GATHERDIV2DI,
28148 IX86_BUILTIN_GATHERDIV4DI,
28149 IX86_BUILTIN_GATHERSIV4SI,
28150 IX86_BUILTIN_GATHERSIV8SI,
28151 IX86_BUILTIN_GATHERDIV4SI,
28152 IX86_BUILTIN_GATHERDIV8SI,
28154 /* AVX512F */
28155 IX86_BUILTIN_ADDPD512,
28156 IX86_BUILTIN_ADDPS512,
28157 IX86_BUILTIN_ADDSD_ROUND,
28158 IX86_BUILTIN_ADDSS_ROUND,
28159 IX86_BUILTIN_ALIGND512,
28160 IX86_BUILTIN_ALIGNQ512,
28161 IX86_BUILTIN_BLENDMD512,
28162 IX86_BUILTIN_BLENDMPD512,
28163 IX86_BUILTIN_BLENDMPS512,
28164 IX86_BUILTIN_BLENDMQ512,
28165 IX86_BUILTIN_BROADCASTF32X4_512,
28166 IX86_BUILTIN_BROADCASTF64X4_512,
28167 IX86_BUILTIN_BROADCASTI32X4_512,
28168 IX86_BUILTIN_BROADCASTI64X4_512,
28169 IX86_BUILTIN_BROADCASTSD512,
28170 IX86_BUILTIN_BROADCASTSS512,
28171 IX86_BUILTIN_CMPD512,
28172 IX86_BUILTIN_CMPPD512,
28173 IX86_BUILTIN_CMPPS512,
28174 IX86_BUILTIN_CMPQ512,
28175 IX86_BUILTIN_CMPSD_MASK,
28176 IX86_BUILTIN_CMPSS_MASK,
28177 IX86_BUILTIN_COMIDF,
28178 IX86_BUILTIN_COMISF,
28179 IX86_BUILTIN_COMPRESSPD512,
28180 IX86_BUILTIN_COMPRESSPDSTORE512,
28181 IX86_BUILTIN_COMPRESSPS512,
28182 IX86_BUILTIN_COMPRESSPSSTORE512,
28183 IX86_BUILTIN_CVTDQ2PD512,
28184 IX86_BUILTIN_CVTDQ2PS512,
28185 IX86_BUILTIN_CVTPD2DQ512,
28186 IX86_BUILTIN_CVTPD2PS512,
28187 IX86_BUILTIN_CVTPD2UDQ512,
28188 IX86_BUILTIN_CVTPH2PS512,
28189 IX86_BUILTIN_CVTPS2DQ512,
28190 IX86_BUILTIN_CVTPS2PD512,
28191 IX86_BUILTIN_CVTPS2PH512,
28192 IX86_BUILTIN_CVTPS2UDQ512,
28193 IX86_BUILTIN_CVTSD2SS_ROUND,
28194 IX86_BUILTIN_CVTSI2SD64,
28195 IX86_BUILTIN_CVTSI2SS32,
28196 IX86_BUILTIN_CVTSI2SS64,
28197 IX86_BUILTIN_CVTSS2SD_ROUND,
28198 IX86_BUILTIN_CVTTPD2DQ512,
28199 IX86_BUILTIN_CVTTPD2UDQ512,
28200 IX86_BUILTIN_CVTTPS2DQ512,
28201 IX86_BUILTIN_CVTTPS2UDQ512,
28202 IX86_BUILTIN_CVTUDQ2PD512,
28203 IX86_BUILTIN_CVTUDQ2PS512,
28204 IX86_BUILTIN_CVTUSI2SD32,
28205 IX86_BUILTIN_CVTUSI2SD64,
28206 IX86_BUILTIN_CVTUSI2SS32,
28207 IX86_BUILTIN_CVTUSI2SS64,
28208 IX86_BUILTIN_DIVPD512,
28209 IX86_BUILTIN_DIVPS512,
28210 IX86_BUILTIN_DIVSD_ROUND,
28211 IX86_BUILTIN_DIVSS_ROUND,
28212 IX86_BUILTIN_EXPANDPD512,
28213 IX86_BUILTIN_EXPANDPD512Z,
28214 IX86_BUILTIN_EXPANDPDLOAD512,
28215 IX86_BUILTIN_EXPANDPDLOAD512Z,
28216 IX86_BUILTIN_EXPANDPS512,
28217 IX86_BUILTIN_EXPANDPS512Z,
28218 IX86_BUILTIN_EXPANDPSLOAD512,
28219 IX86_BUILTIN_EXPANDPSLOAD512Z,
28220 IX86_BUILTIN_EXTRACTF32X4,
28221 IX86_BUILTIN_EXTRACTF64X4,
28222 IX86_BUILTIN_EXTRACTI32X4,
28223 IX86_BUILTIN_EXTRACTI64X4,
28224 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28225 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28226 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28227 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28228 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28229 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28230 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28231 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28232 IX86_BUILTIN_GETEXPPD512,
28233 IX86_BUILTIN_GETEXPPS512,
28234 IX86_BUILTIN_GETEXPSD128,
28235 IX86_BUILTIN_GETEXPSS128,
28236 IX86_BUILTIN_GETMANTPD512,
28237 IX86_BUILTIN_GETMANTPS512,
28238 IX86_BUILTIN_GETMANTSD128,
28239 IX86_BUILTIN_GETMANTSS128,
28240 IX86_BUILTIN_INSERTF32X4,
28241 IX86_BUILTIN_INSERTF64X4,
28242 IX86_BUILTIN_INSERTI32X4,
28243 IX86_BUILTIN_INSERTI64X4,
28244 IX86_BUILTIN_LOADAPD512,
28245 IX86_BUILTIN_LOADAPS512,
28246 IX86_BUILTIN_LOADDQUDI512,
28247 IX86_BUILTIN_LOADDQUSI512,
28248 IX86_BUILTIN_LOADUPD512,
28249 IX86_BUILTIN_LOADUPS512,
28250 IX86_BUILTIN_MAXPD512,
28251 IX86_BUILTIN_MAXPS512,
28252 IX86_BUILTIN_MAXSD_ROUND,
28253 IX86_BUILTIN_MAXSS_ROUND,
28254 IX86_BUILTIN_MINPD512,
28255 IX86_BUILTIN_MINPS512,
28256 IX86_BUILTIN_MINSD_ROUND,
28257 IX86_BUILTIN_MINSS_ROUND,
28258 IX86_BUILTIN_MOVAPD512,
28259 IX86_BUILTIN_MOVAPS512,
28260 IX86_BUILTIN_MOVDDUP512,
28261 IX86_BUILTIN_MOVDQA32LOAD512,
28262 IX86_BUILTIN_MOVDQA32STORE512,
28263 IX86_BUILTIN_MOVDQA32_512,
28264 IX86_BUILTIN_MOVDQA64LOAD512,
28265 IX86_BUILTIN_MOVDQA64STORE512,
28266 IX86_BUILTIN_MOVDQA64_512,
28267 IX86_BUILTIN_MOVNTDQ512,
28268 IX86_BUILTIN_MOVNTDQA512,
28269 IX86_BUILTIN_MOVNTPD512,
28270 IX86_BUILTIN_MOVNTPS512,
28271 IX86_BUILTIN_MOVSHDUP512,
28272 IX86_BUILTIN_MOVSLDUP512,
28273 IX86_BUILTIN_MULPD512,
28274 IX86_BUILTIN_MULPS512,
28275 IX86_BUILTIN_MULSD_ROUND,
28276 IX86_BUILTIN_MULSS_ROUND,
28277 IX86_BUILTIN_PABSD512,
28278 IX86_BUILTIN_PABSQ512,
28279 IX86_BUILTIN_PADDD512,
28280 IX86_BUILTIN_PADDQ512,
28281 IX86_BUILTIN_PANDD512,
28282 IX86_BUILTIN_PANDND512,
28283 IX86_BUILTIN_PANDNQ512,
28284 IX86_BUILTIN_PANDQ512,
28285 IX86_BUILTIN_PBROADCASTD512,
28286 IX86_BUILTIN_PBROADCASTD512_GPR,
28287 IX86_BUILTIN_PBROADCASTMB512,
28288 IX86_BUILTIN_PBROADCASTMW512,
28289 IX86_BUILTIN_PBROADCASTQ512,
28290 IX86_BUILTIN_PBROADCASTQ512_GPR,
28291 IX86_BUILTIN_PBROADCASTQ512_MEM,
28292 IX86_BUILTIN_PCMPEQD512_MASK,
28293 IX86_BUILTIN_PCMPEQQ512_MASK,
28294 IX86_BUILTIN_PCMPGTD512_MASK,
28295 IX86_BUILTIN_PCMPGTQ512_MASK,
28296 IX86_BUILTIN_PCOMPRESSD512,
28297 IX86_BUILTIN_PCOMPRESSDSTORE512,
28298 IX86_BUILTIN_PCOMPRESSQ512,
28299 IX86_BUILTIN_PCOMPRESSQSTORE512,
28300 IX86_BUILTIN_PEXPANDD512,
28301 IX86_BUILTIN_PEXPANDD512Z,
28302 IX86_BUILTIN_PEXPANDDLOAD512,
28303 IX86_BUILTIN_PEXPANDDLOAD512Z,
28304 IX86_BUILTIN_PEXPANDQ512,
28305 IX86_BUILTIN_PEXPANDQ512Z,
28306 IX86_BUILTIN_PEXPANDQLOAD512,
28307 IX86_BUILTIN_PEXPANDQLOAD512Z,
28308 IX86_BUILTIN_PMAXSD512,
28309 IX86_BUILTIN_PMAXSQ512,
28310 IX86_BUILTIN_PMAXUD512,
28311 IX86_BUILTIN_PMAXUQ512,
28312 IX86_BUILTIN_PMINSD512,
28313 IX86_BUILTIN_PMINSQ512,
28314 IX86_BUILTIN_PMINUD512,
28315 IX86_BUILTIN_PMINUQ512,
28316 IX86_BUILTIN_PMOVDB512,
28317 IX86_BUILTIN_PMOVDB512_MEM,
28318 IX86_BUILTIN_PMOVDW512,
28319 IX86_BUILTIN_PMOVDW512_MEM,
28320 IX86_BUILTIN_PMOVQB512,
28321 IX86_BUILTIN_PMOVQB512_MEM,
28322 IX86_BUILTIN_PMOVQD512,
28323 IX86_BUILTIN_PMOVQD512_MEM,
28324 IX86_BUILTIN_PMOVQW512,
28325 IX86_BUILTIN_PMOVQW512_MEM,
28326 IX86_BUILTIN_PMOVSDB512,
28327 IX86_BUILTIN_PMOVSDB512_MEM,
28328 IX86_BUILTIN_PMOVSDW512,
28329 IX86_BUILTIN_PMOVSDW512_MEM,
28330 IX86_BUILTIN_PMOVSQB512,
28331 IX86_BUILTIN_PMOVSQB512_MEM,
28332 IX86_BUILTIN_PMOVSQD512,
28333 IX86_BUILTIN_PMOVSQD512_MEM,
28334 IX86_BUILTIN_PMOVSQW512,
28335 IX86_BUILTIN_PMOVSQW512_MEM,
28336 IX86_BUILTIN_PMOVSXBD512,
28337 IX86_BUILTIN_PMOVSXBQ512,
28338 IX86_BUILTIN_PMOVSXDQ512,
28339 IX86_BUILTIN_PMOVSXWD512,
28340 IX86_BUILTIN_PMOVSXWQ512,
28341 IX86_BUILTIN_PMOVUSDB512,
28342 IX86_BUILTIN_PMOVUSDB512_MEM,
28343 IX86_BUILTIN_PMOVUSDW512,
28344 IX86_BUILTIN_PMOVUSDW512_MEM,
28345 IX86_BUILTIN_PMOVUSQB512,
28346 IX86_BUILTIN_PMOVUSQB512_MEM,
28347 IX86_BUILTIN_PMOVUSQD512,
28348 IX86_BUILTIN_PMOVUSQD512_MEM,
28349 IX86_BUILTIN_PMOVUSQW512,
28350 IX86_BUILTIN_PMOVUSQW512_MEM,
28351 IX86_BUILTIN_PMOVZXBD512,
28352 IX86_BUILTIN_PMOVZXBQ512,
28353 IX86_BUILTIN_PMOVZXDQ512,
28354 IX86_BUILTIN_PMOVZXWD512,
28355 IX86_BUILTIN_PMOVZXWQ512,
28356 IX86_BUILTIN_PMULDQ512,
28357 IX86_BUILTIN_PMULLD512,
28358 IX86_BUILTIN_PMULUDQ512,
28359 IX86_BUILTIN_PORD512,
28360 IX86_BUILTIN_PORQ512,
28361 IX86_BUILTIN_PROLD512,
28362 IX86_BUILTIN_PROLQ512,
28363 IX86_BUILTIN_PROLVD512,
28364 IX86_BUILTIN_PROLVQ512,
28365 IX86_BUILTIN_PRORD512,
28366 IX86_BUILTIN_PRORQ512,
28367 IX86_BUILTIN_PRORVD512,
28368 IX86_BUILTIN_PRORVQ512,
28369 IX86_BUILTIN_PSHUFD512,
28370 IX86_BUILTIN_PSLLD512,
28371 IX86_BUILTIN_PSLLDI512,
28372 IX86_BUILTIN_PSLLQ512,
28373 IX86_BUILTIN_PSLLQI512,
28374 IX86_BUILTIN_PSLLVV16SI,
28375 IX86_BUILTIN_PSLLVV8DI,
28376 IX86_BUILTIN_PSRAD512,
28377 IX86_BUILTIN_PSRADI512,
28378 IX86_BUILTIN_PSRAQ512,
28379 IX86_BUILTIN_PSRAQI512,
28380 IX86_BUILTIN_PSRAVV16SI,
28381 IX86_BUILTIN_PSRAVV8DI,
28382 IX86_BUILTIN_PSRLD512,
28383 IX86_BUILTIN_PSRLDI512,
28384 IX86_BUILTIN_PSRLQ512,
28385 IX86_BUILTIN_PSRLQI512,
28386 IX86_BUILTIN_PSRLVV16SI,
28387 IX86_BUILTIN_PSRLVV8DI,
28388 IX86_BUILTIN_PSUBD512,
28389 IX86_BUILTIN_PSUBQ512,
28390 IX86_BUILTIN_PTESTMD512,
28391 IX86_BUILTIN_PTESTMQ512,
28392 IX86_BUILTIN_PTESTNMD512,
28393 IX86_BUILTIN_PTESTNMQ512,
28394 IX86_BUILTIN_PUNPCKHDQ512,
28395 IX86_BUILTIN_PUNPCKHQDQ512,
28396 IX86_BUILTIN_PUNPCKLDQ512,
28397 IX86_BUILTIN_PUNPCKLQDQ512,
28398 IX86_BUILTIN_PXORD512,
28399 IX86_BUILTIN_PXORQ512,
28400 IX86_BUILTIN_RCP14PD512,
28401 IX86_BUILTIN_RCP14PS512,
28402 IX86_BUILTIN_RCP14SD,
28403 IX86_BUILTIN_RCP14SS,
28404 IX86_BUILTIN_RNDSCALEPD,
28405 IX86_BUILTIN_RNDSCALEPS,
28406 IX86_BUILTIN_RNDSCALESD,
28407 IX86_BUILTIN_RNDSCALESS,
28408 IX86_BUILTIN_RSQRT14PD512,
28409 IX86_BUILTIN_RSQRT14PS512,
28410 IX86_BUILTIN_RSQRT14SD,
28411 IX86_BUILTIN_RSQRT14SS,
28412 IX86_BUILTIN_SCALEFPD512,
28413 IX86_BUILTIN_SCALEFPS512,
28414 IX86_BUILTIN_SCALEFSD,
28415 IX86_BUILTIN_SCALEFSS,
28416 IX86_BUILTIN_SHUFPD512,
28417 IX86_BUILTIN_SHUFPS512,
28418 IX86_BUILTIN_SHUF_F32x4,
28419 IX86_BUILTIN_SHUF_F64x2,
28420 IX86_BUILTIN_SHUF_I32x4,
28421 IX86_BUILTIN_SHUF_I64x2,
28422 IX86_BUILTIN_SQRTPD512,
28423 IX86_BUILTIN_SQRTPD512_MASK,
28424 IX86_BUILTIN_SQRTPS512_MASK,
28425 IX86_BUILTIN_SQRTPS_NR512,
28426 IX86_BUILTIN_SQRTSD_ROUND,
28427 IX86_BUILTIN_SQRTSS_ROUND,
28428 IX86_BUILTIN_STOREAPD512,
28429 IX86_BUILTIN_STOREAPS512,
28430 IX86_BUILTIN_STOREDQUDI512,
28431 IX86_BUILTIN_STOREDQUSI512,
28432 IX86_BUILTIN_STOREUPD512,
28433 IX86_BUILTIN_STOREUPS512,
28434 IX86_BUILTIN_SUBPD512,
28435 IX86_BUILTIN_SUBPS512,
28436 IX86_BUILTIN_SUBSD_ROUND,
28437 IX86_BUILTIN_SUBSS_ROUND,
28438 IX86_BUILTIN_UCMPD512,
28439 IX86_BUILTIN_UCMPQ512,
28440 IX86_BUILTIN_UNPCKHPD512,
28441 IX86_BUILTIN_UNPCKHPS512,
28442 IX86_BUILTIN_UNPCKLPD512,
28443 IX86_BUILTIN_UNPCKLPS512,
28444 IX86_BUILTIN_VCVTSD2SI32,
28445 IX86_BUILTIN_VCVTSD2SI64,
28446 IX86_BUILTIN_VCVTSD2USI32,
28447 IX86_BUILTIN_VCVTSD2USI64,
28448 IX86_BUILTIN_VCVTSS2SI32,
28449 IX86_BUILTIN_VCVTSS2SI64,
28450 IX86_BUILTIN_VCVTSS2USI32,
28451 IX86_BUILTIN_VCVTSS2USI64,
28452 IX86_BUILTIN_VCVTTSD2SI32,
28453 IX86_BUILTIN_VCVTTSD2SI64,
28454 IX86_BUILTIN_VCVTTSD2USI32,
28455 IX86_BUILTIN_VCVTTSD2USI64,
28456 IX86_BUILTIN_VCVTTSS2SI32,
28457 IX86_BUILTIN_VCVTTSS2SI64,
28458 IX86_BUILTIN_VCVTTSS2USI32,
28459 IX86_BUILTIN_VCVTTSS2USI64,
28460 IX86_BUILTIN_VFMADDPD512_MASK,
28461 IX86_BUILTIN_VFMADDPD512_MASK3,
28462 IX86_BUILTIN_VFMADDPD512_MASKZ,
28463 IX86_BUILTIN_VFMADDPS512_MASK,
28464 IX86_BUILTIN_VFMADDPS512_MASK3,
28465 IX86_BUILTIN_VFMADDPS512_MASKZ,
28466 IX86_BUILTIN_VFMADDSD3_ROUND,
28467 IX86_BUILTIN_VFMADDSS3_ROUND,
28468 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28469 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28470 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28471 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28472 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28473 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28474 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28475 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28476 IX86_BUILTIN_VFMSUBPD512_MASK3,
28477 IX86_BUILTIN_VFMSUBPS512_MASK3,
28478 IX86_BUILTIN_VFMSUBSD3_MASK3,
28479 IX86_BUILTIN_VFMSUBSS3_MASK3,
28480 IX86_BUILTIN_VFNMADDPD512_MASK,
28481 IX86_BUILTIN_VFNMADDPS512_MASK,
28482 IX86_BUILTIN_VFNMSUBPD512_MASK,
28483 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28484 IX86_BUILTIN_VFNMSUBPS512_MASK,
28485 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28486 IX86_BUILTIN_VPCLZCNTD512,
28487 IX86_BUILTIN_VPCLZCNTQ512,
28488 IX86_BUILTIN_VPCONFLICTD512,
28489 IX86_BUILTIN_VPCONFLICTQ512,
28490 IX86_BUILTIN_VPERMDF512,
28491 IX86_BUILTIN_VPERMDI512,
28492 IX86_BUILTIN_VPERMI2VARD512,
28493 IX86_BUILTIN_VPERMI2VARPD512,
28494 IX86_BUILTIN_VPERMI2VARPS512,
28495 IX86_BUILTIN_VPERMI2VARQ512,
28496 IX86_BUILTIN_VPERMILPD512,
28497 IX86_BUILTIN_VPERMILPS512,
28498 IX86_BUILTIN_VPERMILVARPD512,
28499 IX86_BUILTIN_VPERMILVARPS512,
28500 IX86_BUILTIN_VPERMT2VARD512,
28501 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28502 IX86_BUILTIN_VPERMT2VARPD512,
28503 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28504 IX86_BUILTIN_VPERMT2VARPS512,
28505 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28506 IX86_BUILTIN_VPERMT2VARQ512,
28507 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28508 IX86_BUILTIN_VPERMVARDF512,
28509 IX86_BUILTIN_VPERMVARDI512,
28510 IX86_BUILTIN_VPERMVARSF512,
28511 IX86_BUILTIN_VPERMVARSI512,
28512 IX86_BUILTIN_VTERNLOGD512_MASK,
28513 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28514 IX86_BUILTIN_VTERNLOGQ512_MASK,
28515 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28517 /* Mask arithmetic operations */
28518 IX86_BUILTIN_KAND16,
28519 IX86_BUILTIN_KANDN16,
28520 IX86_BUILTIN_KNOT16,
28521 IX86_BUILTIN_KOR16,
28522 IX86_BUILTIN_KORTESTC16,
28523 IX86_BUILTIN_KORTESTZ16,
28524 IX86_BUILTIN_KUNPCKBW,
28525 IX86_BUILTIN_KXNOR16,
28526 IX86_BUILTIN_KXOR16,
28527 IX86_BUILTIN_KMOV16,
28529 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28530 where all operands are 32-byte or 64-byte wide respectively. */
28531 IX86_BUILTIN_GATHERALTSIV4DF,
28532 IX86_BUILTIN_GATHERALTDIV8SF,
28533 IX86_BUILTIN_GATHERALTSIV4DI,
28534 IX86_BUILTIN_GATHERALTDIV8SI,
28535 IX86_BUILTIN_GATHER3ALTDIV16SF,
28536 IX86_BUILTIN_GATHER3ALTDIV16SI,
28537 IX86_BUILTIN_GATHER3ALTSIV8DF,
28538 IX86_BUILTIN_GATHER3ALTSIV8DI,
28539 IX86_BUILTIN_GATHER3DIV16SF,
28540 IX86_BUILTIN_GATHER3DIV16SI,
28541 IX86_BUILTIN_GATHER3DIV8DF,
28542 IX86_BUILTIN_GATHER3DIV8DI,
28543 IX86_BUILTIN_GATHER3SIV16SF,
28544 IX86_BUILTIN_GATHER3SIV16SI,
28545 IX86_BUILTIN_GATHER3SIV8DF,
28546 IX86_BUILTIN_GATHER3SIV8DI,
28547 IX86_BUILTIN_SCATTERDIV16SF,
28548 IX86_BUILTIN_SCATTERDIV16SI,
28549 IX86_BUILTIN_SCATTERDIV8DF,
28550 IX86_BUILTIN_SCATTERDIV8DI,
28551 IX86_BUILTIN_SCATTERSIV16SF,
28552 IX86_BUILTIN_SCATTERSIV16SI,
28553 IX86_BUILTIN_SCATTERSIV8DF,
28554 IX86_BUILTIN_SCATTERSIV8DI,
28556 /* AVX512PF */
28557 IX86_BUILTIN_GATHERPFQPD,
28558 IX86_BUILTIN_GATHERPFDPS,
28559 IX86_BUILTIN_GATHERPFDPD,
28560 IX86_BUILTIN_GATHERPFQPS,
28561 IX86_BUILTIN_SCATTERPFDPD,
28562 IX86_BUILTIN_SCATTERPFDPS,
28563 IX86_BUILTIN_SCATTERPFQPD,
28564 IX86_BUILTIN_SCATTERPFQPS,
28566 /* AVX-512ER */
28567 IX86_BUILTIN_EXP2PD_MASK,
28568 IX86_BUILTIN_EXP2PS_MASK,
28569 IX86_BUILTIN_EXP2PS,
28570 IX86_BUILTIN_RCP28PD,
28571 IX86_BUILTIN_RCP28PS,
28572 IX86_BUILTIN_RCP28SD,
28573 IX86_BUILTIN_RCP28SS,
28574 IX86_BUILTIN_RSQRT28PD,
28575 IX86_BUILTIN_RSQRT28PS,
28576 IX86_BUILTIN_RSQRT28SD,
28577 IX86_BUILTIN_RSQRT28SS,
28579 /* SHA builtins. */
28580 IX86_BUILTIN_SHA1MSG1,
28581 IX86_BUILTIN_SHA1MSG2,
28582 IX86_BUILTIN_SHA1NEXTE,
28583 IX86_BUILTIN_SHA1RNDS4,
28584 IX86_BUILTIN_SHA256MSG1,
28585 IX86_BUILTIN_SHA256MSG2,
28586 IX86_BUILTIN_SHA256RNDS2,
28588 /* TFmode support builtins. */
28589 IX86_BUILTIN_INFQ,
28590 IX86_BUILTIN_HUGE_VALQ,
28591 IX86_BUILTIN_FABSQ,
28592 IX86_BUILTIN_COPYSIGNQ,
28594 /* Vectorizer support builtins. */
28595 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28596 IX86_BUILTIN_CPYSGNPS,
28597 IX86_BUILTIN_CPYSGNPD,
28598 IX86_BUILTIN_CPYSGNPS256,
28599 IX86_BUILTIN_CPYSGNPS512,
28600 IX86_BUILTIN_CPYSGNPD256,
28601 IX86_BUILTIN_CPYSGNPD512,
28602 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28603 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28606 /* FMA4 instructions. */
28607 IX86_BUILTIN_VFMADDSS,
28608 IX86_BUILTIN_VFMADDSD,
28609 IX86_BUILTIN_VFMADDPS,
28610 IX86_BUILTIN_VFMADDPD,
28611 IX86_BUILTIN_VFMADDPS256,
28612 IX86_BUILTIN_VFMADDPD256,
28613 IX86_BUILTIN_VFMADDSUBPS,
28614 IX86_BUILTIN_VFMADDSUBPD,
28615 IX86_BUILTIN_VFMADDSUBPS256,
28616 IX86_BUILTIN_VFMADDSUBPD256,
28618 /* FMA3 instructions. */
28619 IX86_BUILTIN_VFMADDSS3,
28620 IX86_BUILTIN_VFMADDSD3,
28622 /* XOP instructions. */
28623 IX86_BUILTIN_VPCMOV,
28624 IX86_BUILTIN_VPCMOV_V2DI,
28625 IX86_BUILTIN_VPCMOV_V4SI,
28626 IX86_BUILTIN_VPCMOV_V8HI,
28627 IX86_BUILTIN_VPCMOV_V16QI,
28628 IX86_BUILTIN_VPCMOV_V4SF,
28629 IX86_BUILTIN_VPCMOV_V2DF,
28630 IX86_BUILTIN_VPCMOV256,
28631 IX86_BUILTIN_VPCMOV_V4DI256,
28632 IX86_BUILTIN_VPCMOV_V8SI256,
28633 IX86_BUILTIN_VPCMOV_V16HI256,
28634 IX86_BUILTIN_VPCMOV_V32QI256,
28635 IX86_BUILTIN_VPCMOV_V8SF256,
28636 IX86_BUILTIN_VPCMOV_V4DF256,
28638 IX86_BUILTIN_VPPERM,
28640 IX86_BUILTIN_VPMACSSWW,
28641 IX86_BUILTIN_VPMACSWW,
28642 IX86_BUILTIN_VPMACSSWD,
28643 IX86_BUILTIN_VPMACSWD,
28644 IX86_BUILTIN_VPMACSSDD,
28645 IX86_BUILTIN_VPMACSDD,
28646 IX86_BUILTIN_VPMACSSDQL,
28647 IX86_BUILTIN_VPMACSSDQH,
28648 IX86_BUILTIN_VPMACSDQL,
28649 IX86_BUILTIN_VPMACSDQH,
28650 IX86_BUILTIN_VPMADCSSWD,
28651 IX86_BUILTIN_VPMADCSWD,
28653 IX86_BUILTIN_VPHADDBW,
28654 IX86_BUILTIN_VPHADDBD,
28655 IX86_BUILTIN_VPHADDBQ,
28656 IX86_BUILTIN_VPHADDWD,
28657 IX86_BUILTIN_VPHADDWQ,
28658 IX86_BUILTIN_VPHADDDQ,
28659 IX86_BUILTIN_VPHADDUBW,
28660 IX86_BUILTIN_VPHADDUBD,
28661 IX86_BUILTIN_VPHADDUBQ,
28662 IX86_BUILTIN_VPHADDUWD,
28663 IX86_BUILTIN_VPHADDUWQ,
28664 IX86_BUILTIN_VPHADDUDQ,
28665 IX86_BUILTIN_VPHSUBBW,
28666 IX86_BUILTIN_VPHSUBWD,
28667 IX86_BUILTIN_VPHSUBDQ,
28669 IX86_BUILTIN_VPROTB,
28670 IX86_BUILTIN_VPROTW,
28671 IX86_BUILTIN_VPROTD,
28672 IX86_BUILTIN_VPROTQ,
28673 IX86_BUILTIN_VPROTB_IMM,
28674 IX86_BUILTIN_VPROTW_IMM,
28675 IX86_BUILTIN_VPROTD_IMM,
28676 IX86_BUILTIN_VPROTQ_IMM,
28678 IX86_BUILTIN_VPSHLB,
28679 IX86_BUILTIN_VPSHLW,
28680 IX86_BUILTIN_VPSHLD,
28681 IX86_BUILTIN_VPSHLQ,
28682 IX86_BUILTIN_VPSHAB,
28683 IX86_BUILTIN_VPSHAW,
28684 IX86_BUILTIN_VPSHAD,
28685 IX86_BUILTIN_VPSHAQ,
28687 IX86_BUILTIN_VFRCZSS,
28688 IX86_BUILTIN_VFRCZSD,
28689 IX86_BUILTIN_VFRCZPS,
28690 IX86_BUILTIN_VFRCZPD,
28691 IX86_BUILTIN_VFRCZPS256,
28692 IX86_BUILTIN_VFRCZPD256,
28694 IX86_BUILTIN_VPCOMEQUB,
28695 IX86_BUILTIN_VPCOMNEUB,
28696 IX86_BUILTIN_VPCOMLTUB,
28697 IX86_BUILTIN_VPCOMLEUB,
28698 IX86_BUILTIN_VPCOMGTUB,
28699 IX86_BUILTIN_VPCOMGEUB,
28700 IX86_BUILTIN_VPCOMFALSEUB,
28701 IX86_BUILTIN_VPCOMTRUEUB,
28703 IX86_BUILTIN_VPCOMEQUW,
28704 IX86_BUILTIN_VPCOMNEUW,
28705 IX86_BUILTIN_VPCOMLTUW,
28706 IX86_BUILTIN_VPCOMLEUW,
28707 IX86_BUILTIN_VPCOMGTUW,
28708 IX86_BUILTIN_VPCOMGEUW,
28709 IX86_BUILTIN_VPCOMFALSEUW,
28710 IX86_BUILTIN_VPCOMTRUEUW,
28712 IX86_BUILTIN_VPCOMEQUD,
28713 IX86_BUILTIN_VPCOMNEUD,
28714 IX86_BUILTIN_VPCOMLTUD,
28715 IX86_BUILTIN_VPCOMLEUD,
28716 IX86_BUILTIN_VPCOMGTUD,
28717 IX86_BUILTIN_VPCOMGEUD,
28718 IX86_BUILTIN_VPCOMFALSEUD,
28719 IX86_BUILTIN_VPCOMTRUEUD,
28721 IX86_BUILTIN_VPCOMEQUQ,
28722 IX86_BUILTIN_VPCOMNEUQ,
28723 IX86_BUILTIN_VPCOMLTUQ,
28724 IX86_BUILTIN_VPCOMLEUQ,
28725 IX86_BUILTIN_VPCOMGTUQ,
28726 IX86_BUILTIN_VPCOMGEUQ,
28727 IX86_BUILTIN_VPCOMFALSEUQ,
28728 IX86_BUILTIN_VPCOMTRUEUQ,
28730 IX86_BUILTIN_VPCOMEQB,
28731 IX86_BUILTIN_VPCOMNEB,
28732 IX86_BUILTIN_VPCOMLTB,
28733 IX86_BUILTIN_VPCOMLEB,
28734 IX86_BUILTIN_VPCOMGTB,
28735 IX86_BUILTIN_VPCOMGEB,
28736 IX86_BUILTIN_VPCOMFALSEB,
28737 IX86_BUILTIN_VPCOMTRUEB,
28739 IX86_BUILTIN_VPCOMEQW,
28740 IX86_BUILTIN_VPCOMNEW,
28741 IX86_BUILTIN_VPCOMLTW,
28742 IX86_BUILTIN_VPCOMLEW,
28743 IX86_BUILTIN_VPCOMGTW,
28744 IX86_BUILTIN_VPCOMGEW,
28745 IX86_BUILTIN_VPCOMFALSEW,
28746 IX86_BUILTIN_VPCOMTRUEW,
28748 IX86_BUILTIN_VPCOMEQD,
28749 IX86_BUILTIN_VPCOMNED,
28750 IX86_BUILTIN_VPCOMLTD,
28751 IX86_BUILTIN_VPCOMLED,
28752 IX86_BUILTIN_VPCOMGTD,
28753 IX86_BUILTIN_VPCOMGED,
28754 IX86_BUILTIN_VPCOMFALSED,
28755 IX86_BUILTIN_VPCOMTRUED,
28757 IX86_BUILTIN_VPCOMEQQ,
28758 IX86_BUILTIN_VPCOMNEQ,
28759 IX86_BUILTIN_VPCOMLTQ,
28760 IX86_BUILTIN_VPCOMLEQ,
28761 IX86_BUILTIN_VPCOMGTQ,
28762 IX86_BUILTIN_VPCOMGEQ,
28763 IX86_BUILTIN_VPCOMFALSEQ,
28764 IX86_BUILTIN_VPCOMTRUEQ,
28766 /* LWP instructions. */
28767 IX86_BUILTIN_LLWPCB,
28768 IX86_BUILTIN_SLWPCB,
28769 IX86_BUILTIN_LWPVAL32,
28770 IX86_BUILTIN_LWPVAL64,
28771 IX86_BUILTIN_LWPINS32,
28772 IX86_BUILTIN_LWPINS64,
28774 IX86_BUILTIN_CLZS,
28776 /* RTM */
28777 IX86_BUILTIN_XBEGIN,
28778 IX86_BUILTIN_XEND,
28779 IX86_BUILTIN_XABORT,
28780 IX86_BUILTIN_XTEST,
28782 /* BMI instructions. */
28783 IX86_BUILTIN_BEXTR32,
28784 IX86_BUILTIN_BEXTR64,
28785 IX86_BUILTIN_CTZS,
28787 /* TBM instructions. */
28788 IX86_BUILTIN_BEXTRI32,
28789 IX86_BUILTIN_BEXTRI64,
28791 /* BMI2 instructions. */
28792 IX86_BUILTIN_BZHI32,
28793 IX86_BUILTIN_BZHI64,
28794 IX86_BUILTIN_PDEP32,
28795 IX86_BUILTIN_PDEP64,
28796 IX86_BUILTIN_PEXT32,
28797 IX86_BUILTIN_PEXT64,
28799 /* ADX instructions. */
28800 IX86_BUILTIN_ADDCARRYX32,
28801 IX86_BUILTIN_ADDCARRYX64,
28803 /* FSGSBASE instructions. */
28804 IX86_BUILTIN_RDFSBASE32,
28805 IX86_BUILTIN_RDFSBASE64,
28806 IX86_BUILTIN_RDGSBASE32,
28807 IX86_BUILTIN_RDGSBASE64,
28808 IX86_BUILTIN_WRFSBASE32,
28809 IX86_BUILTIN_WRFSBASE64,
28810 IX86_BUILTIN_WRGSBASE32,
28811 IX86_BUILTIN_WRGSBASE64,
28813 /* RDRND instructions. */
28814 IX86_BUILTIN_RDRAND16_STEP,
28815 IX86_BUILTIN_RDRAND32_STEP,
28816 IX86_BUILTIN_RDRAND64_STEP,
28818 /* RDSEED instructions. */
28819 IX86_BUILTIN_RDSEED16_STEP,
28820 IX86_BUILTIN_RDSEED32_STEP,
28821 IX86_BUILTIN_RDSEED64_STEP,
28823 /* F16C instructions. */
28824 IX86_BUILTIN_CVTPH2PS,
28825 IX86_BUILTIN_CVTPH2PS256,
28826 IX86_BUILTIN_CVTPS2PH,
28827 IX86_BUILTIN_CVTPS2PH256,
28829 /* CFString built-in for darwin */
28830 IX86_BUILTIN_CFSTRING,
28832 /* Builtins to get CPU type and supported features. */
28833 IX86_BUILTIN_CPU_INIT,
28834 IX86_BUILTIN_CPU_IS,
28835 IX86_BUILTIN_CPU_SUPPORTS,
28837 /* Read/write FLAGS register built-ins. */
28838 IX86_BUILTIN_READ_FLAGS,
28839 IX86_BUILTIN_WRITE_FLAGS,
28841 IX86_BUILTIN_MAX
28844 /* Table for the ix86 builtin decls. */
28845 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28847 /* Table of all of the builtin functions that are possible with different ISA's
28848 but are waiting to be built until a function is declared to use that
28849 ISA. */
28850 struct builtin_isa {
28851 const char *name; /* function name */
28852 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28853 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28854 bool const_p; /* true if the declaration is constant */
28855 bool set_and_not_built_p;
28858 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28861 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28862 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28863 function decl in the ix86_builtins array. Returns the function decl or
28864 NULL_TREE, if the builtin was not added.
28866 If the front end has a special hook for builtin functions, delay adding
28867 builtin functions that aren't in the current ISA until the ISA is changed
28868 with function specific optimization. Doing so, can save about 300K for the
28869 default compiler. When the builtin is expanded, check at that time whether
28870 it is valid.
28872 If the front end doesn't have a special hook, record all builtins, even if
28873 it isn't an instruction set in the current ISA in case the user uses
28874 function specific options for a different ISA, so that we don't get scope
28875 errors if a builtin is added in the middle of a function scope. */
28877 static inline tree
28878 def_builtin (HOST_WIDE_INT mask, const char *name,
28879 enum ix86_builtin_func_type tcode,
28880 enum ix86_builtins code)
28882 tree decl = NULL_TREE;
28884 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28886 ix86_builtins_isa[(int) code].isa = mask;
28888 mask &= ~OPTION_MASK_ISA_64BIT;
28889 if (mask == 0
28890 || (mask & ix86_isa_flags) != 0
28891 || (lang_hooks.builtin_function
28892 == lang_hooks.builtin_function_ext_scope))
28895 tree type = ix86_get_builtin_func_type (tcode);
28896 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28897 NULL, NULL_TREE);
28898 ix86_builtins[(int) code] = decl;
28899 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28901 else
28903 ix86_builtins[(int) code] = NULL_TREE;
28904 ix86_builtins_isa[(int) code].tcode = tcode;
28905 ix86_builtins_isa[(int) code].name = name;
28906 ix86_builtins_isa[(int) code].const_p = false;
28907 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28911 return decl;
28914 /* Like def_builtin, but also marks the function decl "const". */
28916 static inline tree
28917 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28918 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28920 tree decl = def_builtin (mask, name, tcode, code);
28921 if (decl)
28922 TREE_READONLY (decl) = 1;
28923 else
28924 ix86_builtins_isa[(int) code].const_p = true;
28926 return decl;
28929 /* Add any new builtin functions for a given ISA that may not have been
28930 declared. This saves a bit of space compared to adding all of the
28931 declarations to the tree, even if we didn't use them. */
28933 static void
28934 ix86_add_new_builtins (HOST_WIDE_INT isa)
28936 int i;
28938 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28940 if ((ix86_builtins_isa[i].isa & isa) != 0
28941 && ix86_builtins_isa[i].set_and_not_built_p)
28943 tree decl, type;
28945 /* Don't define the builtin again. */
28946 ix86_builtins_isa[i].set_and_not_built_p = false;
28948 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28949 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28950 type, i, BUILT_IN_MD, NULL,
28951 NULL_TREE);
28953 ix86_builtins[i] = decl;
28954 if (ix86_builtins_isa[i].const_p)
28955 TREE_READONLY (decl) = 1;
28960 /* Bits for builtin_description.flag. */
28962 /* Set when we don't support the comparison natively, and should
28963 swap_comparison in order to support it. */
28964 #define BUILTIN_DESC_SWAP_OPERANDS 1
28966 struct builtin_description
28968 const HOST_WIDE_INT mask;
28969 const enum insn_code icode;
28970 const char *const name;
28971 const enum ix86_builtins code;
28972 const enum rtx_code comparison;
28973 const int flag;
28976 static const struct builtin_description bdesc_comi[] =
28978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28981 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
29000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
29001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
29004 static const struct builtin_description bdesc_pcmpestr[] =
29006 /* SSE4.2 */
29007 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
29008 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
29009 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
29010 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
29011 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
29012 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
29013 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
29016 static const struct builtin_description bdesc_pcmpistr[] =
29018 /* SSE4.2 */
29019 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
29020 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
29021 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
29022 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
29023 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
29024 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
29025 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
29028 /* Special builtins with variable number of arguments. */
29029 static const struct builtin_description bdesc_special_args[] =
29031 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
29032 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
29033 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29035 /* 80387 (for use internally for atomic compound assignment). */
29036 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29037 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29038 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
29039 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29041 /* MMX */
29042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29044 /* 3DNow! */
29045 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29047 /* FXSR, XSAVE and XSAVEOPT */
29048 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29049 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29050 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29051 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29052 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29054 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29055 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29056 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29057 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29058 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29060 /* SSE */
29061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29070 /* SSE or 3DNow!A */
29071 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29072 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29074 /* SSE2 */
29075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29082 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29089 /* SSE3 */
29090 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29092 /* SSE4.1 */
29093 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29095 /* SSE4A */
29096 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29097 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29099 /* AVX */
29100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29103 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29104 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29105 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29130 /* AVX2 */
29131 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29132 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29133 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29134 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29135 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29136 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29137 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29138 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29139 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29141 /* AVX512F */
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29190 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29191 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29192 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29193 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29194 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29195 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29197 /* FSGSBASE */
29198 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29199 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29200 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29201 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29202 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29203 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29204 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29205 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29207 /* RTM */
29208 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29209 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29210 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29213 /* Builtins with variable number of arguments. */
29214 static const struct builtin_description bdesc_args[] =
29216 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29217 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29218 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29219 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29220 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29221 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29222 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29224 /* MMX */
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29261 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29288 /* 3DNow! */
29289 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29290 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29291 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29292 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29294 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29295 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29296 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29297 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29298 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29299 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29300 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29301 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29302 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29303 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29304 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29305 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29306 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29307 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29308 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29310 /* 3DNow!A */
29311 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29312 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29313 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29314 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29315 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29316 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29318 /* SSE */
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29327 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29330 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29364 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29365 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29369 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29371 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29372 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29374 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29379 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29380 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29384 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29386 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29392 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29393 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29395 /* SSE MMX or 3Dnow!A */
29396 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29397 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29398 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29400 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29401 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29402 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29403 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29405 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29406 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29408 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29410 /* SSE2 */
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29429 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29430 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29502 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29505 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29538 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29542 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29547 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29552 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29553 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29554 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29555 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29556 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29557 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29560 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29561 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29562 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29563 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29564 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29565 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29567 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29568 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29569 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29570 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29580 /* SSE2 MMX */
29581 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29582 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29584 /* SSE3 */
29585 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29586 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29588 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29589 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29590 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29591 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29592 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29593 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29595 /* SSSE3 */
29596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29599 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29600 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29601 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29603 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29604 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29605 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29608 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29609 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29610 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29611 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29612 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29613 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29614 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29615 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29616 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29617 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29618 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29619 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29620 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29621 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29622 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29623 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29624 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29625 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29626 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29628 /* SSSE3. */
29629 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29630 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29632 /* SSE4.1 */
29633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29638 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29640 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29642 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29645 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29646 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29647 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29648 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29649 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29650 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29651 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29652 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29653 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29654 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29655 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29656 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29658 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29659 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29660 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29661 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29662 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29663 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29664 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29665 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29666 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29667 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29668 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29669 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29671 /* SSE4.1 */
29672 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29673 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29674 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29675 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29677 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29678 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29679 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29680 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29682 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29683 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29685 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29686 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29688 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29689 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29690 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29691 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29693 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29694 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29696 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29697 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29699 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29700 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29701 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29703 /* SSE4.2 */
29704 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29705 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29706 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29707 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29708 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29710 /* SSE4A */
29711 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29712 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29713 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29714 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29716 /* AES */
29717 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29718 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29720 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29721 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29722 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29723 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29725 /* PCLMUL */
29726 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29728 /* AVX */
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29830 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29831 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29841 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29842 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29843 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29864 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29865 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29867 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29869 /* AVX2 */
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29984 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29994 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29995 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29996 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29997 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29998 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29999 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30000 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30001 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30002 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
30003 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
30004 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
30005 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
30006 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30008 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30009 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30010 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30013 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30015 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30017 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30019 /* BMI */
30020 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30021 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30022 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30024 /* TBM */
30025 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30026 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30028 /* F16C */
30029 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30030 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30031 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30032 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30034 /* BMI2 */
30035 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30036 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30037 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30038 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30039 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30040 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30042 /* AVX512F */
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30092 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30093 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30095 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30204 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30205 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30206 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30207 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30239 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30244 /* Mask arithmetic operations */
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30256 /* SHA */
30257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30266 /* Builtins with rounding support. */
30267 static const struct builtin_description bdesc_round_args[] =
30269 /* AVX512F */
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30289 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30291 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30298 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30300 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30350 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30352 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30354 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30356 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30358 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30360 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30362 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30364 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30390 /* AVX512ER */
30391 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30392 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30393 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30394 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30395 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30396 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30397 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30398 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30399 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30400 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30403 /* FMA4 and XOP. */
30404 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30405 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30406 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30407 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30408 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30409 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30410 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30411 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30412 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30413 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30414 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30415 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30416 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30417 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30418 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30419 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30420 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30421 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30422 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30423 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30424 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30425 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30426 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30427 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30428 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30429 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30430 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30431 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30432 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30433 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30434 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30435 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30436 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30437 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30438 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30439 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30440 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30441 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30442 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30443 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30444 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30445 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30446 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30447 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30448 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30449 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30450 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30451 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30452 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30453 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30454 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30455 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30457 static const struct builtin_description bdesc_multi_arg[] =
30459 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30460 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30461 UNKNOWN, (int)MULTI_ARG_3_SF },
30462 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30463 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30464 UNKNOWN, (int)MULTI_ARG_3_DF },
30466 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30467 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30468 UNKNOWN, (int)MULTI_ARG_3_SF },
30469 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30470 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30471 UNKNOWN, (int)MULTI_ARG_3_DF },
30473 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30474 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30475 UNKNOWN, (int)MULTI_ARG_3_SF },
30476 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30477 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30478 UNKNOWN, (int)MULTI_ARG_3_DF },
30479 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30480 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30481 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30482 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30483 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30484 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30486 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30487 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30488 UNKNOWN, (int)MULTI_ARG_3_SF },
30489 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30490 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30491 UNKNOWN, (int)MULTI_ARG_3_DF },
30492 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30493 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30494 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30495 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30496 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30497 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30659 /* TM vector builtins. */
30661 /* Reuse the existing x86-specific `struct builtin_description' cause
30662 we're lazy. Add casts to make them fit. */
30663 static const struct builtin_description bdesc_tm[] =
30665 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30666 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30667 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30668 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30669 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30670 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30671 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30673 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30674 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30675 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30676 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30677 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30678 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30679 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30681 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30682 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30683 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30684 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30685 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30686 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30687 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30689 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30690 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30691 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30694 /* TM callbacks. */
30696 /* Return the builtin decl needed to load a vector of TYPE. */
30698 static tree
30699 ix86_builtin_tm_load (tree type)
30701 if (TREE_CODE (type) == VECTOR_TYPE)
30703 switch (tree_to_uhwi (TYPE_SIZE (type)))
30705 case 64:
30706 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30707 case 128:
30708 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30709 case 256:
30710 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30713 return NULL_TREE;
30716 /* Return the builtin decl needed to store a vector of TYPE. */
30718 static tree
30719 ix86_builtin_tm_store (tree type)
30721 if (TREE_CODE (type) == VECTOR_TYPE)
30723 switch (tree_to_uhwi (TYPE_SIZE (type)))
30725 case 64:
30726 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30727 case 128:
30728 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30729 case 256:
30730 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30733 return NULL_TREE;
30736 /* Initialize the transactional memory vector load/store builtins. */
30738 static void
30739 ix86_init_tm_builtins (void)
30741 enum ix86_builtin_func_type ftype;
30742 const struct builtin_description *d;
30743 size_t i;
30744 tree decl;
30745 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30746 tree attrs_log, attrs_type_log;
30748 if (!flag_tm)
30749 return;
30751 /* If there are no builtins defined, we must be compiling in a
30752 language without trans-mem support. */
30753 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30754 return;
30756 /* Use whatever attributes a normal TM load has. */
30757 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30758 attrs_load = DECL_ATTRIBUTES (decl);
30759 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30760 /* Use whatever attributes a normal TM store has. */
30761 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30762 attrs_store = DECL_ATTRIBUTES (decl);
30763 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30764 /* Use whatever attributes a normal TM log has. */
30765 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30766 attrs_log = DECL_ATTRIBUTES (decl);
30767 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30769 for (i = 0, d = bdesc_tm;
30770 i < ARRAY_SIZE (bdesc_tm);
30771 i++, d++)
30773 if ((d->mask & ix86_isa_flags) != 0
30774 || (lang_hooks.builtin_function
30775 == lang_hooks.builtin_function_ext_scope))
30777 tree type, attrs, attrs_type;
30778 enum built_in_function code = (enum built_in_function) d->code;
30780 ftype = (enum ix86_builtin_func_type) d->flag;
30781 type = ix86_get_builtin_func_type (ftype);
30783 if (BUILTIN_TM_LOAD_P (code))
30785 attrs = attrs_load;
30786 attrs_type = attrs_type_load;
30788 else if (BUILTIN_TM_STORE_P (code))
30790 attrs = attrs_store;
30791 attrs_type = attrs_type_store;
30793 else
30795 attrs = attrs_log;
30796 attrs_type = attrs_type_log;
30798 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30799 /* The builtin without the prefix for
30800 calling it directly. */
30801 d->name + strlen ("__builtin_"),
30802 attrs);
30803 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30804 set the TYPE_ATTRIBUTES. */
30805 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30807 set_builtin_decl (code, decl, false);
30812 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30813 in the current target ISA to allow the user to compile particular modules
30814 with different target specific options that differ from the command line
30815 options. */
30816 static void
30817 ix86_init_mmx_sse_builtins (void)
30819 const struct builtin_description * d;
30820 enum ix86_builtin_func_type ftype;
30821 size_t i;
30823 /* Add all special builtins with variable number of operands. */
30824 for (i = 0, d = bdesc_special_args;
30825 i < ARRAY_SIZE (bdesc_special_args);
30826 i++, d++)
30828 if (d->name == 0)
30829 continue;
30831 ftype = (enum ix86_builtin_func_type) d->flag;
30832 def_builtin (d->mask, d->name, ftype, d->code);
30835 /* Add all builtins with variable number of operands. */
30836 for (i = 0, d = bdesc_args;
30837 i < ARRAY_SIZE (bdesc_args);
30838 i++, d++)
30840 if (d->name == 0)
30841 continue;
30843 ftype = (enum ix86_builtin_func_type) d->flag;
30844 def_builtin_const (d->mask, d->name, ftype, d->code);
30847 /* Add all builtins with rounding. */
30848 for (i = 0, d = bdesc_round_args;
30849 i < ARRAY_SIZE (bdesc_round_args);
30850 i++, d++)
30852 if (d->name == 0)
30853 continue;
30855 ftype = (enum ix86_builtin_func_type) d->flag;
30856 def_builtin_const (d->mask, d->name, ftype, d->code);
30859 /* pcmpestr[im] insns. */
30860 for (i = 0, d = bdesc_pcmpestr;
30861 i < ARRAY_SIZE (bdesc_pcmpestr);
30862 i++, d++)
30864 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30865 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30866 else
30867 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30868 def_builtin_const (d->mask, d->name, ftype, d->code);
30871 /* pcmpistr[im] insns. */
30872 for (i = 0, d = bdesc_pcmpistr;
30873 i < ARRAY_SIZE (bdesc_pcmpistr);
30874 i++, d++)
30876 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30877 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30878 else
30879 ftype = INT_FTYPE_V16QI_V16QI_INT;
30880 def_builtin_const (d->mask, d->name, ftype, d->code);
30883 /* comi/ucomi insns. */
30884 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30886 if (d->mask == OPTION_MASK_ISA_SSE2)
30887 ftype = INT_FTYPE_V2DF_V2DF;
30888 else
30889 ftype = INT_FTYPE_V4SF_V4SF;
30890 def_builtin_const (d->mask, d->name, ftype, d->code);
30893 /* SSE */
30894 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30895 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30896 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30897 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30899 /* SSE or 3DNow!A */
30900 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30901 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30902 IX86_BUILTIN_MASKMOVQ);
30904 /* SSE2 */
30905 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30906 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30908 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30909 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30910 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30911 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30913 /* SSE3. */
30914 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30915 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30916 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30917 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30919 /* AES */
30920 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30921 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30922 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30923 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30924 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30925 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30926 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30927 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30928 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30929 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30930 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30931 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30933 /* PCLMUL */
30934 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30935 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30937 /* RDRND */
30938 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30939 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30940 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30941 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30942 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30943 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30944 IX86_BUILTIN_RDRAND64_STEP);
30946 /* AVX2 */
30947 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30948 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30949 IX86_BUILTIN_GATHERSIV2DF);
30951 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30952 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30953 IX86_BUILTIN_GATHERSIV4DF);
30955 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30956 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30957 IX86_BUILTIN_GATHERDIV2DF);
30959 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30960 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30961 IX86_BUILTIN_GATHERDIV4DF);
30963 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30964 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30965 IX86_BUILTIN_GATHERSIV4SF);
30967 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30968 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30969 IX86_BUILTIN_GATHERSIV8SF);
30971 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30972 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30973 IX86_BUILTIN_GATHERDIV4SF);
30975 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30976 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30977 IX86_BUILTIN_GATHERDIV8SF);
30979 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30980 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30981 IX86_BUILTIN_GATHERSIV2DI);
30983 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30984 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30985 IX86_BUILTIN_GATHERSIV4DI);
30987 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30988 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30989 IX86_BUILTIN_GATHERDIV2DI);
30991 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30992 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30993 IX86_BUILTIN_GATHERDIV4DI);
30995 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30996 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30997 IX86_BUILTIN_GATHERSIV4SI);
30999 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31000 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31001 IX86_BUILTIN_GATHERSIV8SI);
31003 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31004 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31005 IX86_BUILTIN_GATHERDIV4SI);
31007 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31008 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31009 IX86_BUILTIN_GATHERDIV8SI);
31011 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31012 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31013 IX86_BUILTIN_GATHERALTSIV4DF);
31015 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31016 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31017 IX86_BUILTIN_GATHERALTDIV8SF);
31019 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31020 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31021 IX86_BUILTIN_GATHERALTSIV4DI);
31023 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31024 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31025 IX86_BUILTIN_GATHERALTDIV8SI);
31027 /* AVX512F */
31028 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31029 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31030 IX86_BUILTIN_GATHER3SIV16SF);
31032 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31033 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31034 IX86_BUILTIN_GATHER3SIV8DF);
31036 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31037 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31038 IX86_BUILTIN_GATHER3DIV16SF);
31040 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31041 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31042 IX86_BUILTIN_GATHER3DIV8DF);
31044 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31045 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31046 IX86_BUILTIN_GATHER3SIV16SI);
31048 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31049 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31050 IX86_BUILTIN_GATHER3SIV8DI);
31052 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31053 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31054 IX86_BUILTIN_GATHER3DIV16SI);
31056 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31057 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31058 IX86_BUILTIN_GATHER3DIV8DI);
31060 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31061 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31062 IX86_BUILTIN_GATHER3ALTSIV8DF);
31064 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31065 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31066 IX86_BUILTIN_GATHER3ALTDIV16SF);
31068 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31069 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31070 IX86_BUILTIN_GATHER3ALTSIV8DI);
31072 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31073 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31074 IX86_BUILTIN_GATHER3ALTDIV16SI);
31076 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31077 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31078 IX86_BUILTIN_SCATTERSIV16SF);
31080 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31081 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31082 IX86_BUILTIN_SCATTERSIV8DF);
31084 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31085 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31086 IX86_BUILTIN_SCATTERDIV16SF);
31088 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31089 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31090 IX86_BUILTIN_SCATTERDIV8DF);
31092 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31093 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31094 IX86_BUILTIN_SCATTERSIV16SI);
31096 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31097 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31098 IX86_BUILTIN_SCATTERSIV8DI);
31100 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31101 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31102 IX86_BUILTIN_SCATTERDIV16SI);
31104 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31105 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31106 IX86_BUILTIN_SCATTERDIV8DI);
31108 /* AVX512PF */
31109 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31110 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31111 IX86_BUILTIN_GATHERPFDPD);
31112 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31113 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31114 IX86_BUILTIN_GATHERPFDPS);
31115 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31116 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31117 IX86_BUILTIN_GATHERPFQPD);
31118 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31119 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31120 IX86_BUILTIN_GATHERPFQPS);
31121 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31122 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31123 IX86_BUILTIN_SCATTERPFDPD);
31124 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31125 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31126 IX86_BUILTIN_SCATTERPFDPS);
31127 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31128 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31129 IX86_BUILTIN_SCATTERPFQPD);
31130 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31131 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31132 IX86_BUILTIN_SCATTERPFQPS);
31134 /* SHA */
31135 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31136 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31137 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31138 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31139 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31140 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31141 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31142 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31143 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31144 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31145 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31146 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31147 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31148 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31150 /* RTM. */
31151 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31152 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31154 /* MMX access to the vec_init patterns. */
31155 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31156 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31158 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31159 V4HI_FTYPE_HI_HI_HI_HI,
31160 IX86_BUILTIN_VEC_INIT_V4HI);
31162 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31163 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31164 IX86_BUILTIN_VEC_INIT_V8QI);
31166 /* Access to the vec_extract patterns. */
31167 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31168 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31169 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31170 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31171 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31172 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31173 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31174 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31175 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31176 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31178 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31179 "__builtin_ia32_vec_ext_v4hi",
31180 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31182 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31183 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31185 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31186 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31188 /* Access to the vec_set patterns. */
31189 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31190 "__builtin_ia32_vec_set_v2di",
31191 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31193 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31194 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31196 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31197 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31199 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31200 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31202 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31203 "__builtin_ia32_vec_set_v4hi",
31204 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31206 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31207 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31209 /* RDSEED */
31210 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31211 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31212 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31213 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31214 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31215 "__builtin_ia32_rdseed_di_step",
31216 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31218 /* ADCX */
31219 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31220 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31221 def_builtin (OPTION_MASK_ISA_64BIT,
31222 "__builtin_ia32_addcarryx_u64",
31223 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31224 IX86_BUILTIN_ADDCARRYX64);
31226 /* Read/write FLAGS. */
31227 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31228 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31229 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31230 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31231 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31232 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31233 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31234 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31237 /* Add FMA4 multi-arg argument instructions */
31238 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31240 if (d->name == 0)
31241 continue;
31243 ftype = (enum ix86_builtin_func_type) d->flag;
31244 def_builtin_const (d->mask, d->name, ftype, d->code);
31248 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31249 to return a pointer to VERSION_DECL if the outcome of the expression
31250 formed by PREDICATE_CHAIN is true. This function will be called during
31251 version dispatch to decide which function version to execute. It returns
31252 the basic block at the end, to which more conditions can be added. */
31254 static basic_block
31255 add_condition_to_bb (tree function_decl, tree version_decl,
31256 tree predicate_chain, basic_block new_bb)
31258 gimple return_stmt;
31259 tree convert_expr, result_var;
31260 gimple convert_stmt;
31261 gimple call_cond_stmt;
31262 gimple if_else_stmt;
31264 basic_block bb1, bb2, bb3;
31265 edge e12, e23;
31267 tree cond_var, and_expr_var = NULL_TREE;
31268 gimple_seq gseq;
31270 tree predicate_decl, predicate_arg;
31272 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31274 gcc_assert (new_bb != NULL);
31275 gseq = bb_seq (new_bb);
31278 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31279 build_fold_addr_expr (version_decl));
31280 result_var = create_tmp_var (ptr_type_node, NULL);
31281 convert_stmt = gimple_build_assign (result_var, convert_expr);
31282 return_stmt = gimple_build_return (result_var);
31284 if (predicate_chain == NULL_TREE)
31286 gimple_seq_add_stmt (&gseq, convert_stmt);
31287 gimple_seq_add_stmt (&gseq, return_stmt);
31288 set_bb_seq (new_bb, gseq);
31289 gimple_set_bb (convert_stmt, new_bb);
31290 gimple_set_bb (return_stmt, new_bb);
31291 pop_cfun ();
31292 return new_bb;
31295 while (predicate_chain != NULL)
31297 cond_var = create_tmp_var (integer_type_node, NULL);
31298 predicate_decl = TREE_PURPOSE (predicate_chain);
31299 predicate_arg = TREE_VALUE (predicate_chain);
31300 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31301 gimple_call_set_lhs (call_cond_stmt, cond_var);
31303 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31304 gimple_set_bb (call_cond_stmt, new_bb);
31305 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31307 predicate_chain = TREE_CHAIN (predicate_chain);
31309 if (and_expr_var == NULL)
31310 and_expr_var = cond_var;
31311 else
31313 gimple assign_stmt;
31314 /* Use MIN_EXPR to check if any integer is zero?.
31315 and_expr_var = min_expr <cond_var, and_expr_var> */
31316 assign_stmt = gimple_build_assign (and_expr_var,
31317 build2 (MIN_EXPR, integer_type_node,
31318 cond_var, and_expr_var));
31320 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31321 gimple_set_bb (assign_stmt, new_bb);
31322 gimple_seq_add_stmt (&gseq, assign_stmt);
31326 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31327 integer_zero_node,
31328 NULL_TREE, NULL_TREE);
31329 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31330 gimple_set_bb (if_else_stmt, new_bb);
31331 gimple_seq_add_stmt (&gseq, if_else_stmt);
31333 gimple_seq_add_stmt (&gseq, convert_stmt);
31334 gimple_seq_add_stmt (&gseq, return_stmt);
31335 set_bb_seq (new_bb, gseq);
31337 bb1 = new_bb;
31338 e12 = split_block (bb1, if_else_stmt);
31339 bb2 = e12->dest;
31340 e12->flags &= ~EDGE_FALLTHRU;
31341 e12->flags |= EDGE_TRUE_VALUE;
31343 e23 = split_block (bb2, return_stmt);
31345 gimple_set_bb (convert_stmt, bb2);
31346 gimple_set_bb (return_stmt, bb2);
31348 bb3 = e23->dest;
31349 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31351 remove_edge (e23);
31352 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31354 pop_cfun ();
31356 return bb3;
31359 /* This parses the attribute arguments to target in DECL and determines
31360 the right builtin to use to match the platform specification.
31361 It returns the priority value for this version decl. If PREDICATE_LIST
31362 is not NULL, it stores the list of cpu features that need to be checked
31363 before dispatching this function. */
31365 static unsigned int
31366 get_builtin_code_for_version (tree decl, tree *predicate_list)
31368 tree attrs;
31369 struct cl_target_option cur_target;
31370 tree target_node;
31371 struct cl_target_option *new_target;
31372 const char *arg_str = NULL;
31373 const char *attrs_str = NULL;
31374 char *tok_str = NULL;
31375 char *token;
31377 /* Priority of i386 features, greater value is higher priority. This is
31378 used to decide the order in which function dispatch must happen. For
31379 instance, a version specialized for SSE4.2 should be checked for dispatch
31380 before a version for SSE3, as SSE4.2 implies SSE3. */
31381 enum feature_priority
31383 P_ZERO = 0,
31384 P_MMX,
31385 P_SSE,
31386 P_SSE2,
31387 P_SSE3,
31388 P_SSSE3,
31389 P_PROC_SSSE3,
31390 P_SSE4_A,
31391 P_PROC_SSE4_A,
31392 P_SSE4_1,
31393 P_SSE4_2,
31394 P_PROC_SSE4_2,
31395 P_POPCNT,
31396 P_AVX,
31397 P_PROC_AVX,
31398 P_FMA4,
31399 P_XOP,
31400 P_PROC_XOP,
31401 P_FMA,
31402 P_PROC_FMA,
31403 P_AVX2,
31404 P_PROC_AVX2
31407 enum feature_priority priority = P_ZERO;
31409 /* These are the target attribute strings for which a dispatcher is
31410 available, from fold_builtin_cpu. */
31412 static struct _feature_list
31414 const char *const name;
31415 const enum feature_priority priority;
31417 const feature_list[] =
31419 {"mmx", P_MMX},
31420 {"sse", P_SSE},
31421 {"sse2", P_SSE2},
31422 {"sse3", P_SSE3},
31423 {"sse4a", P_SSE4_A},
31424 {"ssse3", P_SSSE3},
31425 {"sse4.1", P_SSE4_1},
31426 {"sse4.2", P_SSE4_2},
31427 {"popcnt", P_POPCNT},
31428 {"avx", P_AVX},
31429 {"fma4", P_FMA4},
31430 {"xop", P_XOP},
31431 {"fma", P_FMA},
31432 {"avx2", P_AVX2}
31436 static unsigned int NUM_FEATURES
31437 = sizeof (feature_list) / sizeof (struct _feature_list);
31439 unsigned int i;
31441 tree predicate_chain = NULL_TREE;
31442 tree predicate_decl, predicate_arg;
31444 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31445 gcc_assert (attrs != NULL);
31447 attrs = TREE_VALUE (TREE_VALUE (attrs));
31449 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31450 attrs_str = TREE_STRING_POINTER (attrs);
31452 /* Return priority zero for default function. */
31453 if (strcmp (attrs_str, "default") == 0)
31454 return 0;
31456 /* Handle arch= if specified. For priority, set it to be 1 more than
31457 the best instruction set the processor can handle. For instance, if
31458 there is a version for atom and a version for ssse3 (the highest ISA
31459 priority for atom), the atom version must be checked for dispatch
31460 before the ssse3 version. */
31461 if (strstr (attrs_str, "arch=") != NULL)
31463 cl_target_option_save (&cur_target, &global_options);
31464 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31465 &global_options_set);
31467 gcc_assert (target_node);
31468 new_target = TREE_TARGET_OPTION (target_node);
31469 gcc_assert (new_target);
31471 if (new_target->arch_specified && new_target->arch > 0)
31473 switch (new_target->arch)
31475 case PROCESSOR_CORE2:
31476 arg_str = "core2";
31477 priority = P_PROC_SSSE3;
31478 break;
31479 case PROCESSOR_NEHALEM:
31480 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31481 arg_str = "westmere";
31482 else
31483 /* We translate "arch=corei7" and "arch=nehalem" to
31484 "corei7" so that it will be mapped to M_INTEL_COREI7
31485 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31486 arg_str = "corei7";
31487 priority = P_PROC_SSE4_2;
31488 break;
31489 case PROCESSOR_SANDYBRIDGE:
31490 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31491 arg_str = "ivybridge";
31492 else
31493 arg_str = "sandybridge";
31494 priority = P_PROC_AVX;
31495 break;
31496 case PROCESSOR_HASWELL:
31497 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31498 arg_str = "broadwell";
31499 else
31500 arg_str = "haswell";
31501 priority = P_PROC_AVX2;
31502 break;
31503 case PROCESSOR_BONNELL:
31504 arg_str = "bonnell";
31505 priority = P_PROC_SSSE3;
31506 break;
31507 case PROCESSOR_SILVERMONT:
31508 arg_str = "silvermont";
31509 priority = P_PROC_SSE4_2;
31510 break;
31511 case PROCESSOR_AMDFAM10:
31512 arg_str = "amdfam10h";
31513 priority = P_PROC_SSE4_A;
31514 break;
31515 case PROCESSOR_BTVER1:
31516 arg_str = "btver1";
31517 priority = P_PROC_SSE4_A;
31518 break;
31519 case PROCESSOR_BTVER2:
31520 arg_str = "btver2";
31521 priority = P_PROC_AVX;
31522 break;
31523 case PROCESSOR_BDVER1:
31524 arg_str = "bdver1";
31525 priority = P_PROC_XOP;
31526 break;
31527 case PROCESSOR_BDVER2:
31528 arg_str = "bdver2";
31529 priority = P_PROC_FMA;
31530 break;
31531 case PROCESSOR_BDVER3:
31532 arg_str = "bdver3";
31533 priority = P_PROC_FMA;
31534 break;
31535 case PROCESSOR_BDVER4:
31536 arg_str = "bdver4";
31537 priority = P_PROC_AVX2;
31538 break;
31542 cl_target_option_restore (&global_options, &cur_target);
31544 if (predicate_list && arg_str == NULL)
31546 error_at (DECL_SOURCE_LOCATION (decl),
31547 "No dispatcher found for the versioning attributes");
31548 return 0;
31551 if (predicate_list)
31553 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31554 /* For a C string literal the length includes the trailing NULL. */
31555 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31556 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31557 predicate_chain);
31561 /* Process feature name. */
31562 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31563 strcpy (tok_str, attrs_str);
31564 token = strtok (tok_str, ",");
31565 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31567 while (token != NULL)
31569 /* Do not process "arch=" */
31570 if (strncmp (token, "arch=", 5) == 0)
31572 token = strtok (NULL, ",");
31573 continue;
31575 for (i = 0; i < NUM_FEATURES; ++i)
31577 if (strcmp (token, feature_list[i].name) == 0)
31579 if (predicate_list)
31581 predicate_arg = build_string_literal (
31582 strlen (feature_list[i].name) + 1,
31583 feature_list[i].name);
31584 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31585 predicate_chain);
31587 /* Find the maximum priority feature. */
31588 if (feature_list[i].priority > priority)
31589 priority = feature_list[i].priority;
31591 break;
31594 if (predicate_list && i == NUM_FEATURES)
31596 error_at (DECL_SOURCE_LOCATION (decl),
31597 "No dispatcher found for %s", token);
31598 return 0;
31600 token = strtok (NULL, ",");
31602 free (tok_str);
31604 if (predicate_list && predicate_chain == NULL_TREE)
31606 error_at (DECL_SOURCE_LOCATION (decl),
31607 "No dispatcher found for the versioning attributes : %s",
31608 attrs_str);
31609 return 0;
31611 else if (predicate_list)
31613 predicate_chain = nreverse (predicate_chain);
31614 *predicate_list = predicate_chain;
31617 return priority;
31620 /* This compares the priority of target features in function DECL1
31621 and DECL2. It returns positive value if DECL1 is higher priority,
31622 negative value if DECL2 is higher priority and 0 if they are the
31623 same. */
31625 static int
31626 ix86_compare_version_priority (tree decl1, tree decl2)
31628 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31629 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31631 return (int)priority1 - (int)priority2;
31634 /* V1 and V2 point to function versions with different priorities
31635 based on the target ISA. This function compares their priorities. */
31637 static int
31638 feature_compare (const void *v1, const void *v2)
31640 typedef struct _function_version_info
31642 tree version_decl;
31643 tree predicate_chain;
31644 unsigned int dispatch_priority;
31645 } function_version_info;
31647 const function_version_info c1 = *(const function_version_info *)v1;
31648 const function_version_info c2 = *(const function_version_info *)v2;
31649 return (c2.dispatch_priority - c1.dispatch_priority);
31652 /* This function generates the dispatch function for
31653 multi-versioned functions. DISPATCH_DECL is the function which will
31654 contain the dispatch logic. FNDECLS are the function choices for
31655 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31656 in DISPATCH_DECL in which the dispatch code is generated. */
31658 static int
31659 dispatch_function_versions (tree dispatch_decl,
31660 void *fndecls_p,
31661 basic_block *empty_bb)
31663 tree default_decl;
31664 gimple ifunc_cpu_init_stmt;
31665 gimple_seq gseq;
31666 int ix;
31667 tree ele;
31668 vec<tree> *fndecls;
31669 unsigned int num_versions = 0;
31670 unsigned int actual_versions = 0;
31671 unsigned int i;
31673 struct _function_version_info
31675 tree version_decl;
31676 tree predicate_chain;
31677 unsigned int dispatch_priority;
31678 }*function_version_info;
31680 gcc_assert (dispatch_decl != NULL
31681 && fndecls_p != NULL
31682 && empty_bb != NULL);
31684 /*fndecls_p is actually a vector. */
31685 fndecls = static_cast<vec<tree> *> (fndecls_p);
31687 /* At least one more version other than the default. */
31688 num_versions = fndecls->length ();
31689 gcc_assert (num_versions >= 2);
31691 function_version_info = (struct _function_version_info *)
31692 XNEWVEC (struct _function_version_info, (num_versions - 1));
31694 /* The first version in the vector is the default decl. */
31695 default_decl = (*fndecls)[0];
31697 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31699 gseq = bb_seq (*empty_bb);
31700 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31701 constructors, so explicity call __builtin_cpu_init here. */
31702 ifunc_cpu_init_stmt = gimple_build_call_vec (
31703 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31704 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31705 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31706 set_bb_seq (*empty_bb, gseq);
31708 pop_cfun ();
31711 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31713 tree version_decl = ele;
31714 tree predicate_chain = NULL_TREE;
31715 unsigned int priority;
31716 /* Get attribute string, parse it and find the right predicate decl.
31717 The predicate function could be a lengthy combination of many
31718 features, like arch-type and various isa-variants. */
31719 priority = get_builtin_code_for_version (version_decl,
31720 &predicate_chain);
31722 if (predicate_chain == NULL_TREE)
31723 continue;
31725 function_version_info [actual_versions].version_decl = version_decl;
31726 function_version_info [actual_versions].predicate_chain
31727 = predicate_chain;
31728 function_version_info [actual_versions].dispatch_priority = priority;
31729 actual_versions++;
31732 /* Sort the versions according to descending order of dispatch priority. The
31733 priority is based on the ISA. This is not a perfect solution. There
31734 could still be ambiguity. If more than one function version is suitable
31735 to execute, which one should be dispatched? In future, allow the user
31736 to specify a dispatch priority next to the version. */
31737 qsort (function_version_info, actual_versions,
31738 sizeof (struct _function_version_info), feature_compare);
31740 for (i = 0; i < actual_versions; ++i)
31741 *empty_bb = add_condition_to_bb (dispatch_decl,
31742 function_version_info[i].version_decl,
31743 function_version_info[i].predicate_chain,
31744 *empty_bb);
31746 /* dispatch default version at the end. */
31747 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31748 NULL, *empty_bb);
31750 free (function_version_info);
31751 return 0;
31754 /* Comparator function to be used in qsort routine to sort attribute
31755 specification strings to "target". */
31757 static int
31758 attr_strcmp (const void *v1, const void *v2)
31760 const char *c1 = *(char *const*)v1;
31761 const char *c2 = *(char *const*)v2;
31762 return strcmp (c1, c2);
31765 /* ARGLIST is the argument to target attribute. This function tokenizes
31766 the comma separated arguments, sorts them and returns a string which
31767 is a unique identifier for the comma separated arguments. It also
31768 replaces non-identifier characters "=,-" with "_". */
31770 static char *
31771 sorted_attr_string (tree arglist)
31773 tree arg;
31774 size_t str_len_sum = 0;
31775 char **args = NULL;
31776 char *attr_str, *ret_str;
31777 char *attr = NULL;
31778 unsigned int argnum = 1;
31779 unsigned int i;
31781 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31783 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31784 size_t len = strlen (str);
31785 str_len_sum += len + 1;
31786 if (arg != arglist)
31787 argnum++;
31788 for (i = 0; i < strlen (str); i++)
31789 if (str[i] == ',')
31790 argnum++;
31793 attr_str = XNEWVEC (char, str_len_sum);
31794 str_len_sum = 0;
31795 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31797 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31798 size_t len = strlen (str);
31799 memcpy (attr_str + str_len_sum, str, len);
31800 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31801 str_len_sum += len + 1;
31804 /* Replace "=,-" with "_". */
31805 for (i = 0; i < strlen (attr_str); i++)
31806 if (attr_str[i] == '=' || attr_str[i]== '-')
31807 attr_str[i] = '_';
31809 if (argnum == 1)
31810 return attr_str;
31812 args = XNEWVEC (char *, argnum);
31814 i = 0;
31815 attr = strtok (attr_str, ",");
31816 while (attr != NULL)
31818 args[i] = attr;
31819 i++;
31820 attr = strtok (NULL, ",");
31823 qsort (args, argnum, sizeof (char *), attr_strcmp);
31825 ret_str = XNEWVEC (char, str_len_sum);
31826 str_len_sum = 0;
31827 for (i = 0; i < argnum; i++)
31829 size_t len = strlen (args[i]);
31830 memcpy (ret_str + str_len_sum, args[i], len);
31831 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31832 str_len_sum += len + 1;
31835 XDELETEVEC (args);
31836 XDELETEVEC (attr_str);
31837 return ret_str;
31840 /* This function changes the assembler name for functions that are
31841 versions. If DECL is a function version and has a "target"
31842 attribute, it appends the attribute string to its assembler name. */
31844 static tree
31845 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31847 tree version_attr;
31848 const char *orig_name, *version_string;
31849 char *attr_str, *assembler_name;
31851 if (DECL_DECLARED_INLINE_P (decl)
31852 && lookup_attribute ("gnu_inline",
31853 DECL_ATTRIBUTES (decl)))
31854 error_at (DECL_SOURCE_LOCATION (decl),
31855 "Function versions cannot be marked as gnu_inline,"
31856 " bodies have to be generated");
31858 if (DECL_VIRTUAL_P (decl)
31859 || DECL_VINDEX (decl))
31860 sorry ("Virtual function multiversioning not supported");
31862 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31864 /* target attribute string cannot be NULL. */
31865 gcc_assert (version_attr != NULL_TREE);
31867 orig_name = IDENTIFIER_POINTER (id);
31868 version_string
31869 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31871 if (strcmp (version_string, "default") == 0)
31872 return id;
31874 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31875 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31877 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31879 /* Allow assembler name to be modified if already set. */
31880 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31881 SET_DECL_RTL (decl, NULL);
31883 tree ret = get_identifier (assembler_name);
31884 XDELETEVEC (attr_str);
31885 XDELETEVEC (assembler_name);
31886 return ret;
31889 /* This function returns true if FN1 and FN2 are versions of the same function,
31890 that is, the target strings of the function decls are different. This assumes
31891 that FN1 and FN2 have the same signature. */
31893 static bool
31894 ix86_function_versions (tree fn1, tree fn2)
31896 tree attr1, attr2;
31897 char *target1, *target2;
31898 bool result;
31900 if (TREE_CODE (fn1) != FUNCTION_DECL
31901 || TREE_CODE (fn2) != FUNCTION_DECL)
31902 return false;
31904 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31905 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31907 /* At least one function decl should have the target attribute specified. */
31908 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31909 return false;
31911 /* Diagnose missing target attribute if one of the decls is already
31912 multi-versioned. */
31913 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31915 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31917 if (attr2 != NULL_TREE)
31919 tree tem = fn1;
31920 fn1 = fn2;
31921 fn2 = tem;
31922 attr1 = attr2;
31924 error_at (DECL_SOURCE_LOCATION (fn2),
31925 "missing %<target%> attribute for multi-versioned %D",
31926 fn2);
31927 inform (DECL_SOURCE_LOCATION (fn1),
31928 "previous declaration of %D", fn1);
31929 /* Prevent diagnosing of the same error multiple times. */
31930 DECL_ATTRIBUTES (fn2)
31931 = tree_cons (get_identifier ("target"),
31932 copy_node (TREE_VALUE (attr1)),
31933 DECL_ATTRIBUTES (fn2));
31935 return false;
31938 target1 = sorted_attr_string (TREE_VALUE (attr1));
31939 target2 = sorted_attr_string (TREE_VALUE (attr2));
31941 /* The sorted target strings must be different for fn1 and fn2
31942 to be versions. */
31943 if (strcmp (target1, target2) == 0)
31944 result = false;
31945 else
31946 result = true;
31948 XDELETEVEC (target1);
31949 XDELETEVEC (target2);
31951 return result;
31954 static tree
31955 ix86_mangle_decl_assembler_name (tree decl, tree id)
31957 /* For function version, add the target suffix to the assembler name. */
31958 if (TREE_CODE (decl) == FUNCTION_DECL
31959 && DECL_FUNCTION_VERSIONED (decl))
31960 id = ix86_mangle_function_version_assembler_name (decl, id);
31961 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31962 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31963 #endif
31965 return id;
31968 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31969 is true, append the full path name of the source file. */
31971 static char *
31972 make_name (tree decl, const char *suffix, bool make_unique)
31974 char *global_var_name;
31975 int name_len;
31976 const char *name;
31977 const char *unique_name = NULL;
31979 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31981 /* Get a unique name that can be used globally without any chances
31982 of collision at link time. */
31983 if (make_unique)
31984 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31986 name_len = strlen (name) + strlen (suffix) + 2;
31988 if (make_unique)
31989 name_len += strlen (unique_name) + 1;
31990 global_var_name = XNEWVEC (char, name_len);
31992 /* Use '.' to concatenate names as it is demangler friendly. */
31993 if (make_unique)
31994 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31995 suffix);
31996 else
31997 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31999 return global_var_name;
32002 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32004 /* Make a dispatcher declaration for the multi-versioned function DECL.
32005 Calls to DECL function will be replaced with calls to the dispatcher
32006 by the front-end. Return the decl created. */
32008 static tree
32009 make_dispatcher_decl (const tree decl)
32011 tree func_decl;
32012 char *func_name;
32013 tree fn_type, func_type;
32014 bool is_uniq = false;
32016 if (TREE_PUBLIC (decl) == 0)
32017 is_uniq = true;
32019 func_name = make_name (decl, "ifunc", is_uniq);
32021 fn_type = TREE_TYPE (decl);
32022 func_type = build_function_type (TREE_TYPE (fn_type),
32023 TYPE_ARG_TYPES (fn_type));
32025 func_decl = build_fn_decl (func_name, func_type);
32026 XDELETEVEC (func_name);
32027 TREE_USED (func_decl) = 1;
32028 DECL_CONTEXT (func_decl) = NULL_TREE;
32029 DECL_INITIAL (func_decl) = error_mark_node;
32030 DECL_ARTIFICIAL (func_decl) = 1;
32031 /* Mark this func as external, the resolver will flip it again if
32032 it gets generated. */
32033 DECL_EXTERNAL (func_decl) = 1;
32034 /* This will be of type IFUNCs have to be externally visible. */
32035 TREE_PUBLIC (func_decl) = 1;
32037 return func_decl;
32040 #endif
32042 /* Returns true if decl is multi-versioned and DECL is the default function,
32043 that is it is not tagged with target specific optimization. */
32045 static bool
32046 is_function_default_version (const tree decl)
32048 if (TREE_CODE (decl) != FUNCTION_DECL
32049 || !DECL_FUNCTION_VERSIONED (decl))
32050 return false;
32051 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32052 gcc_assert (attr);
32053 attr = TREE_VALUE (TREE_VALUE (attr));
32054 return (TREE_CODE (attr) == STRING_CST
32055 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32058 /* Make a dispatcher declaration for the multi-versioned function DECL.
32059 Calls to DECL function will be replaced with calls to the dispatcher
32060 by the front-end. Returns the decl of the dispatcher function. */
32062 static tree
32063 ix86_get_function_versions_dispatcher (void *decl)
32065 tree fn = (tree) decl;
32066 struct cgraph_node *node = NULL;
32067 struct cgraph_node *default_node = NULL;
32068 struct cgraph_function_version_info *node_v = NULL;
32069 struct cgraph_function_version_info *first_v = NULL;
32071 tree dispatch_decl = NULL;
32073 struct cgraph_function_version_info *default_version_info = NULL;
32075 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32077 node = cgraph_get_node (fn);
32078 gcc_assert (node != NULL);
32080 node_v = get_cgraph_node_version (node);
32081 gcc_assert (node_v != NULL);
32083 if (node_v->dispatcher_resolver != NULL)
32084 return node_v->dispatcher_resolver;
32086 /* Find the default version and make it the first node. */
32087 first_v = node_v;
32088 /* Go to the beginning of the chain. */
32089 while (first_v->prev != NULL)
32090 first_v = first_v->prev;
32091 default_version_info = first_v;
32092 while (default_version_info != NULL)
32094 if (is_function_default_version
32095 (default_version_info->this_node->decl))
32096 break;
32097 default_version_info = default_version_info->next;
32100 /* If there is no default node, just return NULL. */
32101 if (default_version_info == NULL)
32102 return NULL;
32104 /* Make default info the first node. */
32105 if (first_v != default_version_info)
32107 default_version_info->prev->next = default_version_info->next;
32108 if (default_version_info->next)
32109 default_version_info->next->prev = default_version_info->prev;
32110 first_v->prev = default_version_info;
32111 default_version_info->next = first_v;
32112 default_version_info->prev = NULL;
32115 default_node = default_version_info->this_node;
32117 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32118 if (targetm.has_ifunc_p ())
32120 struct cgraph_function_version_info *it_v = NULL;
32121 struct cgraph_node *dispatcher_node = NULL;
32122 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32124 /* Right now, the dispatching is done via ifunc. */
32125 dispatch_decl = make_dispatcher_decl (default_node->decl);
32127 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32128 gcc_assert (dispatcher_node != NULL);
32129 dispatcher_node->dispatcher_function = 1;
32130 dispatcher_version_info
32131 = insert_new_cgraph_node_version (dispatcher_node);
32132 dispatcher_version_info->next = default_version_info;
32133 dispatcher_node->definition = 1;
32135 /* Set the dispatcher for all the versions. */
32136 it_v = default_version_info;
32137 while (it_v != NULL)
32139 it_v->dispatcher_resolver = dispatch_decl;
32140 it_v = it_v->next;
32143 else
32144 #endif
32146 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32147 "multiversioning needs ifunc which is not supported "
32148 "on this target");
32151 return dispatch_decl;
32154 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32155 it to CHAIN. */
32157 static tree
32158 make_attribute (const char *name, const char *arg_name, tree chain)
32160 tree attr_name;
32161 tree attr_arg_name;
32162 tree attr_args;
32163 tree attr;
32165 attr_name = get_identifier (name);
32166 attr_arg_name = build_string (strlen (arg_name), arg_name);
32167 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32168 attr = tree_cons (attr_name, attr_args, chain);
32169 return attr;
32172 /* Make the resolver function decl to dispatch the versions of
32173 a multi-versioned function, DEFAULT_DECL. Create an
32174 empty basic block in the resolver and store the pointer in
32175 EMPTY_BB. Return the decl of the resolver function. */
32177 static tree
32178 make_resolver_func (const tree default_decl,
32179 const tree dispatch_decl,
32180 basic_block *empty_bb)
32182 char *resolver_name;
32183 tree decl, type, decl_name, t;
32184 bool is_uniq = false;
32186 /* IFUNC's have to be globally visible. So, if the default_decl is
32187 not, then the name of the IFUNC should be made unique. */
32188 if (TREE_PUBLIC (default_decl) == 0)
32189 is_uniq = true;
32191 /* Append the filename to the resolver function if the versions are
32192 not externally visible. This is because the resolver function has
32193 to be externally visible for the loader to find it. So, appending
32194 the filename will prevent conflicts with a resolver function from
32195 another module which is based on the same version name. */
32196 resolver_name = make_name (default_decl, "resolver", is_uniq);
32198 /* The resolver function should return a (void *). */
32199 type = build_function_type_list (ptr_type_node, NULL_TREE);
32201 decl = build_fn_decl (resolver_name, type);
32202 decl_name = get_identifier (resolver_name);
32203 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32205 DECL_NAME (decl) = decl_name;
32206 TREE_USED (decl) = 1;
32207 DECL_ARTIFICIAL (decl) = 1;
32208 DECL_IGNORED_P (decl) = 0;
32209 /* IFUNC resolvers have to be externally visible. */
32210 TREE_PUBLIC (decl) = 1;
32211 DECL_UNINLINABLE (decl) = 1;
32213 /* Resolver is not external, body is generated. */
32214 DECL_EXTERNAL (decl) = 0;
32215 DECL_EXTERNAL (dispatch_decl) = 0;
32217 DECL_CONTEXT (decl) = NULL_TREE;
32218 DECL_INITIAL (decl) = make_node (BLOCK);
32219 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32221 if (DECL_COMDAT_GROUP (default_decl)
32222 || TREE_PUBLIC (default_decl))
32224 /* In this case, each translation unit with a call to this
32225 versioned function will put out a resolver. Ensure it
32226 is comdat to keep just one copy. */
32227 DECL_COMDAT (decl) = 1;
32228 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32230 /* Build result decl and add to function_decl. */
32231 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32232 DECL_ARTIFICIAL (t) = 1;
32233 DECL_IGNORED_P (t) = 1;
32234 DECL_RESULT (decl) = t;
32236 gimplify_function_tree (decl);
32237 push_cfun (DECL_STRUCT_FUNCTION (decl));
32238 *empty_bb = init_lowered_empty_function (decl, false);
32240 cgraph_add_new_function (decl, true);
32241 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32243 pop_cfun ();
32245 gcc_assert (dispatch_decl != NULL);
32246 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32247 DECL_ATTRIBUTES (dispatch_decl)
32248 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32250 /* Create the alias for dispatch to resolver here. */
32251 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32252 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32253 XDELETEVEC (resolver_name);
32254 return decl;
32257 /* Generate the dispatching code body to dispatch multi-versioned function
32258 DECL. The target hook is called to process the "target" attributes and
32259 provide the code to dispatch the right function at run-time. NODE points
32260 to the dispatcher decl whose body will be created. */
32262 static tree
32263 ix86_generate_version_dispatcher_body (void *node_p)
32265 tree resolver_decl;
32266 basic_block empty_bb;
32267 tree default_ver_decl;
32268 struct cgraph_node *versn;
32269 struct cgraph_node *node;
32271 struct cgraph_function_version_info *node_version_info = NULL;
32272 struct cgraph_function_version_info *versn_info = NULL;
32274 node = (cgraph_node *)node_p;
32276 node_version_info = get_cgraph_node_version (node);
32277 gcc_assert (node->dispatcher_function
32278 && node_version_info != NULL);
32280 if (node_version_info->dispatcher_resolver)
32281 return node_version_info->dispatcher_resolver;
32283 /* The first version in the chain corresponds to the default version. */
32284 default_ver_decl = node_version_info->next->this_node->decl;
32286 /* node is going to be an alias, so remove the finalized bit. */
32287 node->definition = false;
32289 resolver_decl = make_resolver_func (default_ver_decl,
32290 node->decl, &empty_bb);
32292 node_version_info->dispatcher_resolver = resolver_decl;
32294 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32296 auto_vec<tree, 2> fn_ver_vec;
32298 for (versn_info = node_version_info->next; versn_info;
32299 versn_info = versn_info->next)
32301 versn = versn_info->this_node;
32302 /* Check for virtual functions here again, as by this time it should
32303 have been determined if this function needs a vtable index or
32304 not. This happens for methods in derived classes that override
32305 virtual methods in base classes but are not explicitly marked as
32306 virtual. */
32307 if (DECL_VINDEX (versn->decl))
32308 sorry ("Virtual function multiversioning not supported");
32310 fn_ver_vec.safe_push (versn->decl);
32313 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32314 rebuild_cgraph_edges ();
32315 pop_cfun ();
32316 return resolver_decl;
32318 /* This builds the processor_model struct type defined in
32319 libgcc/config/i386/cpuinfo.c */
32321 static tree
32322 build_processor_model_struct (void)
32324 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32325 "__cpu_features"};
32326 tree field = NULL_TREE, field_chain = NULL_TREE;
32327 int i;
32328 tree type = make_node (RECORD_TYPE);
32330 /* The first 3 fields are unsigned int. */
32331 for (i = 0; i < 3; ++i)
32333 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32334 get_identifier (field_name[i]), unsigned_type_node);
32335 if (field_chain != NULL_TREE)
32336 DECL_CHAIN (field) = field_chain;
32337 field_chain = field;
32340 /* The last field is an array of unsigned integers of size one. */
32341 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32342 get_identifier (field_name[3]),
32343 build_array_type (unsigned_type_node,
32344 build_index_type (size_one_node)));
32345 if (field_chain != NULL_TREE)
32346 DECL_CHAIN (field) = field_chain;
32347 field_chain = field;
32349 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32350 return type;
32353 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32355 static tree
32356 make_var_decl (tree type, const char *name)
32358 tree new_decl;
32360 new_decl = build_decl (UNKNOWN_LOCATION,
32361 VAR_DECL,
32362 get_identifier(name),
32363 type);
32365 DECL_EXTERNAL (new_decl) = 1;
32366 TREE_STATIC (new_decl) = 1;
32367 TREE_PUBLIC (new_decl) = 1;
32368 DECL_INITIAL (new_decl) = 0;
32369 DECL_ARTIFICIAL (new_decl) = 0;
32370 DECL_PRESERVE_P (new_decl) = 1;
32372 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32373 assemble_variable (new_decl, 0, 0, 0);
32375 return new_decl;
32378 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32379 into an integer defined in libgcc/config/i386/cpuinfo.c */
32381 static tree
32382 fold_builtin_cpu (tree fndecl, tree *args)
32384 unsigned int i;
32385 enum ix86_builtins fn_code = (enum ix86_builtins)
32386 DECL_FUNCTION_CODE (fndecl);
32387 tree param_string_cst = NULL;
32389 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32390 enum processor_features
32392 F_CMOV = 0,
32393 F_MMX,
32394 F_POPCNT,
32395 F_SSE,
32396 F_SSE2,
32397 F_SSE3,
32398 F_SSSE3,
32399 F_SSE4_1,
32400 F_SSE4_2,
32401 F_AVX,
32402 F_AVX2,
32403 F_SSE4_A,
32404 F_FMA4,
32405 F_XOP,
32406 F_FMA,
32407 F_MAX
32410 /* These are the values for vendor types and cpu types and subtypes
32411 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32412 the corresponding start value. */
32413 enum processor_model
32415 M_INTEL = 1,
32416 M_AMD,
32417 M_CPU_TYPE_START,
32418 M_INTEL_BONNELL,
32419 M_INTEL_CORE2,
32420 M_INTEL_COREI7,
32421 M_AMDFAM10H,
32422 M_AMDFAM15H,
32423 M_INTEL_SILVERMONT,
32424 M_AMD_BTVER1,
32425 M_AMD_BTVER2,
32426 M_CPU_SUBTYPE_START,
32427 M_INTEL_COREI7_NEHALEM,
32428 M_INTEL_COREI7_WESTMERE,
32429 M_INTEL_COREI7_SANDYBRIDGE,
32430 M_AMDFAM10H_BARCELONA,
32431 M_AMDFAM10H_SHANGHAI,
32432 M_AMDFAM10H_ISTANBUL,
32433 M_AMDFAM15H_BDVER1,
32434 M_AMDFAM15H_BDVER2,
32435 M_AMDFAM15H_BDVER3,
32436 M_AMDFAM15H_BDVER4,
32437 M_INTEL_COREI7_IVYBRIDGE,
32438 M_INTEL_COREI7_HASWELL,
32439 M_INTEL_COREI7_BROADWELL
32442 static struct _arch_names_table
32444 const char *const name;
32445 const enum processor_model model;
32447 const arch_names_table[] =
32449 {"amd", M_AMD},
32450 {"intel", M_INTEL},
32451 {"atom", M_INTEL_BONNELL},
32452 {"slm", M_INTEL_SILVERMONT},
32453 {"core2", M_INTEL_CORE2},
32454 {"corei7", M_INTEL_COREI7},
32455 {"nehalem", M_INTEL_COREI7_NEHALEM},
32456 {"westmere", M_INTEL_COREI7_WESTMERE},
32457 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32458 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32459 {"haswell", M_INTEL_COREI7_HASWELL},
32460 {"broadwell", M_INTEL_COREI7_BROADWELL},
32461 {"bonnell", M_INTEL_BONNELL},
32462 {"silvermont", M_INTEL_SILVERMONT},
32463 {"amdfam10h", M_AMDFAM10H},
32464 {"barcelona", M_AMDFAM10H_BARCELONA},
32465 {"shanghai", M_AMDFAM10H_SHANGHAI},
32466 {"istanbul", M_AMDFAM10H_ISTANBUL},
32467 {"btver1", M_AMD_BTVER1},
32468 {"amdfam15h", M_AMDFAM15H},
32469 {"bdver1", M_AMDFAM15H_BDVER1},
32470 {"bdver2", M_AMDFAM15H_BDVER2},
32471 {"bdver3", M_AMDFAM15H_BDVER3},
32472 {"bdver4", M_AMDFAM15H_BDVER4},
32473 {"btver2", M_AMD_BTVER2},
32476 static struct _isa_names_table
32478 const char *const name;
32479 const enum processor_features feature;
32481 const isa_names_table[] =
32483 {"cmov", F_CMOV},
32484 {"mmx", F_MMX},
32485 {"popcnt", F_POPCNT},
32486 {"sse", F_SSE},
32487 {"sse2", F_SSE2},
32488 {"sse3", F_SSE3},
32489 {"ssse3", F_SSSE3},
32490 {"sse4a", F_SSE4_A},
32491 {"sse4.1", F_SSE4_1},
32492 {"sse4.2", F_SSE4_2},
32493 {"avx", F_AVX},
32494 {"fma4", F_FMA4},
32495 {"xop", F_XOP},
32496 {"fma", F_FMA},
32497 {"avx2", F_AVX2}
32500 tree __processor_model_type = build_processor_model_struct ();
32501 tree __cpu_model_var = make_var_decl (__processor_model_type,
32502 "__cpu_model");
32505 varpool_add_new_variable (__cpu_model_var);
32507 gcc_assert ((args != NULL) && (*args != NULL));
32509 param_string_cst = *args;
32510 while (param_string_cst
32511 && TREE_CODE (param_string_cst) != STRING_CST)
32513 /* *args must be a expr that can contain other EXPRS leading to a
32514 STRING_CST. */
32515 if (!EXPR_P (param_string_cst))
32517 error ("Parameter to builtin must be a string constant or literal");
32518 return integer_zero_node;
32520 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32523 gcc_assert (param_string_cst);
32525 if (fn_code == IX86_BUILTIN_CPU_IS)
32527 tree ref;
32528 tree field;
32529 tree final;
32531 unsigned int field_val = 0;
32532 unsigned int NUM_ARCH_NAMES
32533 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32535 for (i = 0; i < NUM_ARCH_NAMES; i++)
32536 if (strcmp (arch_names_table[i].name,
32537 TREE_STRING_POINTER (param_string_cst)) == 0)
32538 break;
32540 if (i == NUM_ARCH_NAMES)
32542 error ("Parameter to builtin not valid: %s",
32543 TREE_STRING_POINTER (param_string_cst));
32544 return integer_zero_node;
32547 field = TYPE_FIELDS (__processor_model_type);
32548 field_val = arch_names_table[i].model;
32550 /* CPU types are stored in the next field. */
32551 if (field_val > M_CPU_TYPE_START
32552 && field_val < M_CPU_SUBTYPE_START)
32554 field = DECL_CHAIN (field);
32555 field_val -= M_CPU_TYPE_START;
32558 /* CPU subtypes are stored in the next field. */
32559 if (field_val > M_CPU_SUBTYPE_START)
32561 field = DECL_CHAIN ( DECL_CHAIN (field));
32562 field_val -= M_CPU_SUBTYPE_START;
32565 /* Get the appropriate field in __cpu_model. */
32566 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32567 field, NULL_TREE);
32569 /* Check the value. */
32570 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32571 build_int_cstu (unsigned_type_node, field_val));
32572 return build1 (CONVERT_EXPR, integer_type_node, final);
32574 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32576 tree ref;
32577 tree array_elt;
32578 tree field;
32579 tree final;
32581 unsigned int field_val = 0;
32582 unsigned int NUM_ISA_NAMES
32583 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32585 for (i = 0; i < NUM_ISA_NAMES; i++)
32586 if (strcmp (isa_names_table[i].name,
32587 TREE_STRING_POINTER (param_string_cst)) == 0)
32588 break;
32590 if (i == NUM_ISA_NAMES)
32592 error ("Parameter to builtin not valid: %s",
32593 TREE_STRING_POINTER (param_string_cst));
32594 return integer_zero_node;
32597 field = TYPE_FIELDS (__processor_model_type);
32598 /* Get the last field, which is __cpu_features. */
32599 while (DECL_CHAIN (field))
32600 field = DECL_CHAIN (field);
32602 /* Get the appropriate field: __cpu_model.__cpu_features */
32603 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32604 field, NULL_TREE);
32606 /* Access the 0th element of __cpu_features array. */
32607 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32608 integer_zero_node, NULL_TREE, NULL_TREE);
32610 field_val = (1 << isa_names_table[i].feature);
32611 /* Return __cpu_model.__cpu_features[0] & field_val */
32612 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32613 build_int_cstu (unsigned_type_node, field_val));
32614 return build1 (CONVERT_EXPR, integer_type_node, final);
32616 gcc_unreachable ();
32619 static tree
32620 ix86_fold_builtin (tree fndecl, int n_args,
32621 tree *args, bool ignore ATTRIBUTE_UNUSED)
32623 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32625 enum ix86_builtins fn_code = (enum ix86_builtins)
32626 DECL_FUNCTION_CODE (fndecl);
32627 if (fn_code == IX86_BUILTIN_CPU_IS
32628 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32630 gcc_assert (n_args == 1);
32631 return fold_builtin_cpu (fndecl, args);
32635 #ifdef SUBTARGET_FOLD_BUILTIN
32636 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32637 #endif
32639 return NULL_TREE;
32642 /* Make builtins to detect cpu type and features supported. NAME is
32643 the builtin name, CODE is the builtin code, and FTYPE is the function
32644 type of the builtin. */
32646 static void
32647 make_cpu_type_builtin (const char* name, int code,
32648 enum ix86_builtin_func_type ftype, bool is_const)
32650 tree decl;
32651 tree type;
32653 type = ix86_get_builtin_func_type (ftype);
32654 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32655 NULL, NULL_TREE);
32656 gcc_assert (decl != NULL_TREE);
32657 ix86_builtins[(int) code] = decl;
32658 TREE_READONLY (decl) = is_const;
32661 /* Make builtins to get CPU type and features supported. The created
32662 builtins are :
32664 __builtin_cpu_init (), to detect cpu type and features,
32665 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32666 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32669 static void
32670 ix86_init_platform_type_builtins (void)
32672 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32673 INT_FTYPE_VOID, false);
32674 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32675 INT_FTYPE_PCCHAR, true);
32676 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32677 INT_FTYPE_PCCHAR, true);
32680 /* Internal method for ix86_init_builtins. */
32682 static void
32683 ix86_init_builtins_va_builtins_abi (void)
32685 tree ms_va_ref, sysv_va_ref;
32686 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32687 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32688 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32689 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32691 if (!TARGET_64BIT)
32692 return;
32693 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32694 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32695 ms_va_ref = build_reference_type (ms_va_list_type_node);
32696 sysv_va_ref =
32697 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32699 fnvoid_va_end_ms =
32700 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32701 fnvoid_va_start_ms =
32702 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32703 fnvoid_va_end_sysv =
32704 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32705 fnvoid_va_start_sysv =
32706 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32707 NULL_TREE);
32708 fnvoid_va_copy_ms =
32709 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32710 NULL_TREE);
32711 fnvoid_va_copy_sysv =
32712 build_function_type_list (void_type_node, sysv_va_ref,
32713 sysv_va_ref, NULL_TREE);
32715 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32716 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32717 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32718 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32719 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32720 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32721 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32722 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32723 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32724 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32725 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32726 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32729 static void
32730 ix86_init_builtin_types (void)
32732 tree float128_type_node, float80_type_node;
32734 /* The __float80 type. */
32735 float80_type_node = long_double_type_node;
32736 if (TYPE_MODE (float80_type_node) != XFmode)
32738 /* The __float80 type. */
32739 float80_type_node = make_node (REAL_TYPE);
32741 TYPE_PRECISION (float80_type_node) = 80;
32742 layout_type (float80_type_node);
32744 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32746 /* The __float128 type. */
32747 float128_type_node = make_node (REAL_TYPE);
32748 TYPE_PRECISION (float128_type_node) = 128;
32749 layout_type (float128_type_node);
32750 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32752 /* This macro is built by i386-builtin-types.awk. */
32753 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32756 static void
32757 ix86_init_builtins (void)
32759 tree t;
32761 ix86_init_builtin_types ();
32763 /* Builtins to get CPU type and features. */
32764 ix86_init_platform_type_builtins ();
32766 /* TFmode support builtins. */
32767 def_builtin_const (0, "__builtin_infq",
32768 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32769 def_builtin_const (0, "__builtin_huge_valq",
32770 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32772 /* We will expand them to normal call if SSE isn't available since
32773 they are used by libgcc. */
32774 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32775 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32776 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32777 TREE_READONLY (t) = 1;
32778 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32780 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32781 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32782 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32783 TREE_READONLY (t) = 1;
32784 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32786 ix86_init_tm_builtins ();
32787 ix86_init_mmx_sse_builtins ();
32789 if (TARGET_LP64)
32790 ix86_init_builtins_va_builtins_abi ();
32792 #ifdef SUBTARGET_INIT_BUILTINS
32793 SUBTARGET_INIT_BUILTINS;
32794 #endif
32797 /* Return the ix86 builtin for CODE. */
32799 static tree
32800 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32802 if (code >= IX86_BUILTIN_MAX)
32803 return error_mark_node;
32805 return ix86_builtins[code];
32808 /* Errors in the source file can cause expand_expr to return const0_rtx
32809 where we expect a vector. To avoid crashing, use one of the vector
32810 clear instructions. */
32811 static rtx
32812 safe_vector_operand (rtx x, enum machine_mode mode)
32814 if (x == const0_rtx)
32815 x = CONST0_RTX (mode);
32816 return x;
32819 /* Fixup modeless constants to fit required mode. */
32820 static rtx
32821 fixup_modeless_constant (rtx x, machine_mode mode)
32823 if (GET_MODE (x) == VOIDmode)
32824 x = convert_to_mode (mode, x, 1);
32825 return x;
32828 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32830 static rtx
32831 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32833 rtx pat;
32834 tree arg0 = CALL_EXPR_ARG (exp, 0);
32835 tree arg1 = CALL_EXPR_ARG (exp, 1);
32836 rtx op0 = expand_normal (arg0);
32837 rtx op1 = expand_normal (arg1);
32838 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32839 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32840 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32842 if (VECTOR_MODE_P (mode0))
32843 op0 = safe_vector_operand (op0, mode0);
32844 if (VECTOR_MODE_P (mode1))
32845 op1 = safe_vector_operand (op1, mode1);
32847 if (optimize || !target
32848 || GET_MODE (target) != tmode
32849 || !insn_data[icode].operand[0].predicate (target, tmode))
32850 target = gen_reg_rtx (tmode);
32852 if (GET_MODE (op1) == SImode && mode1 == TImode)
32854 rtx x = gen_reg_rtx (V4SImode);
32855 emit_insn (gen_sse2_loadd (x, op1));
32856 op1 = gen_lowpart (TImode, x);
32859 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32860 op0 = copy_to_mode_reg (mode0, op0);
32861 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32862 op1 = copy_to_mode_reg (mode1, op1);
32864 pat = GEN_FCN (icode) (target, op0, op1);
32865 if (! pat)
32866 return 0;
32868 emit_insn (pat);
32870 return target;
32873 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32875 static rtx
32876 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32877 enum ix86_builtin_func_type m_type,
32878 enum rtx_code sub_code)
32880 rtx pat;
32881 int i;
32882 int nargs;
32883 bool comparison_p = false;
32884 bool tf_p = false;
32885 bool last_arg_constant = false;
32886 int num_memory = 0;
32887 struct {
32888 rtx op;
32889 enum machine_mode mode;
32890 } args[4];
32892 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32894 switch (m_type)
32896 case MULTI_ARG_4_DF2_DI_I:
32897 case MULTI_ARG_4_DF2_DI_I1:
32898 case MULTI_ARG_4_SF2_SI_I:
32899 case MULTI_ARG_4_SF2_SI_I1:
32900 nargs = 4;
32901 last_arg_constant = true;
32902 break;
32904 case MULTI_ARG_3_SF:
32905 case MULTI_ARG_3_DF:
32906 case MULTI_ARG_3_SF2:
32907 case MULTI_ARG_3_DF2:
32908 case MULTI_ARG_3_DI:
32909 case MULTI_ARG_3_SI:
32910 case MULTI_ARG_3_SI_DI:
32911 case MULTI_ARG_3_HI:
32912 case MULTI_ARG_3_HI_SI:
32913 case MULTI_ARG_3_QI:
32914 case MULTI_ARG_3_DI2:
32915 case MULTI_ARG_3_SI2:
32916 case MULTI_ARG_3_HI2:
32917 case MULTI_ARG_3_QI2:
32918 nargs = 3;
32919 break;
32921 case MULTI_ARG_2_SF:
32922 case MULTI_ARG_2_DF:
32923 case MULTI_ARG_2_DI:
32924 case MULTI_ARG_2_SI:
32925 case MULTI_ARG_2_HI:
32926 case MULTI_ARG_2_QI:
32927 nargs = 2;
32928 break;
32930 case MULTI_ARG_2_DI_IMM:
32931 case MULTI_ARG_2_SI_IMM:
32932 case MULTI_ARG_2_HI_IMM:
32933 case MULTI_ARG_2_QI_IMM:
32934 nargs = 2;
32935 last_arg_constant = true;
32936 break;
32938 case MULTI_ARG_1_SF:
32939 case MULTI_ARG_1_DF:
32940 case MULTI_ARG_1_SF2:
32941 case MULTI_ARG_1_DF2:
32942 case MULTI_ARG_1_DI:
32943 case MULTI_ARG_1_SI:
32944 case MULTI_ARG_1_HI:
32945 case MULTI_ARG_1_QI:
32946 case MULTI_ARG_1_SI_DI:
32947 case MULTI_ARG_1_HI_DI:
32948 case MULTI_ARG_1_HI_SI:
32949 case MULTI_ARG_1_QI_DI:
32950 case MULTI_ARG_1_QI_SI:
32951 case MULTI_ARG_1_QI_HI:
32952 nargs = 1;
32953 break;
32955 case MULTI_ARG_2_DI_CMP:
32956 case MULTI_ARG_2_SI_CMP:
32957 case MULTI_ARG_2_HI_CMP:
32958 case MULTI_ARG_2_QI_CMP:
32959 nargs = 2;
32960 comparison_p = true;
32961 break;
32963 case MULTI_ARG_2_SF_TF:
32964 case MULTI_ARG_2_DF_TF:
32965 case MULTI_ARG_2_DI_TF:
32966 case MULTI_ARG_2_SI_TF:
32967 case MULTI_ARG_2_HI_TF:
32968 case MULTI_ARG_2_QI_TF:
32969 nargs = 2;
32970 tf_p = true;
32971 break;
32973 default:
32974 gcc_unreachable ();
32977 if (optimize || !target
32978 || GET_MODE (target) != tmode
32979 || !insn_data[icode].operand[0].predicate (target, tmode))
32980 target = gen_reg_rtx (tmode);
32982 gcc_assert (nargs <= 4);
32984 for (i = 0; i < nargs; i++)
32986 tree arg = CALL_EXPR_ARG (exp, i);
32987 rtx op = expand_normal (arg);
32988 int adjust = (comparison_p) ? 1 : 0;
32989 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32991 if (last_arg_constant && i == nargs - 1)
32993 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32995 enum insn_code new_icode = icode;
32996 switch (icode)
32998 case CODE_FOR_xop_vpermil2v2df3:
32999 case CODE_FOR_xop_vpermil2v4sf3:
33000 case CODE_FOR_xop_vpermil2v4df3:
33001 case CODE_FOR_xop_vpermil2v8sf3:
33002 error ("the last argument must be a 2-bit immediate");
33003 return gen_reg_rtx (tmode);
33004 case CODE_FOR_xop_rotlv2di3:
33005 new_icode = CODE_FOR_rotlv2di3;
33006 goto xop_rotl;
33007 case CODE_FOR_xop_rotlv4si3:
33008 new_icode = CODE_FOR_rotlv4si3;
33009 goto xop_rotl;
33010 case CODE_FOR_xop_rotlv8hi3:
33011 new_icode = CODE_FOR_rotlv8hi3;
33012 goto xop_rotl;
33013 case CODE_FOR_xop_rotlv16qi3:
33014 new_icode = CODE_FOR_rotlv16qi3;
33015 xop_rotl:
33016 if (CONST_INT_P (op))
33018 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
33019 op = GEN_INT (INTVAL (op) & mask);
33020 gcc_checking_assert
33021 (insn_data[icode].operand[i + 1].predicate (op, mode));
33023 else
33025 gcc_checking_assert
33026 (nargs == 2
33027 && insn_data[new_icode].operand[0].mode == tmode
33028 && insn_data[new_icode].operand[1].mode == tmode
33029 && insn_data[new_icode].operand[2].mode == mode
33030 && insn_data[new_icode].operand[0].predicate
33031 == insn_data[icode].operand[0].predicate
33032 && insn_data[new_icode].operand[1].predicate
33033 == insn_data[icode].operand[1].predicate);
33034 icode = new_icode;
33035 goto non_constant;
33037 break;
33038 default:
33039 gcc_unreachable ();
33043 else
33045 non_constant:
33046 if (VECTOR_MODE_P (mode))
33047 op = safe_vector_operand (op, mode);
33049 /* If we aren't optimizing, only allow one memory operand to be
33050 generated. */
33051 if (memory_operand (op, mode))
33052 num_memory++;
33054 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33056 if (optimize
33057 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33058 || num_memory > 1)
33059 op = force_reg (mode, op);
33062 args[i].op = op;
33063 args[i].mode = mode;
33066 switch (nargs)
33068 case 1:
33069 pat = GEN_FCN (icode) (target, args[0].op);
33070 break;
33072 case 2:
33073 if (tf_p)
33074 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33075 GEN_INT ((int)sub_code));
33076 else if (! comparison_p)
33077 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33078 else
33080 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33081 args[0].op,
33082 args[1].op);
33084 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33086 break;
33088 case 3:
33089 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33090 break;
33092 case 4:
33093 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33094 break;
33096 default:
33097 gcc_unreachable ();
33100 if (! pat)
33101 return 0;
33103 emit_insn (pat);
33104 return target;
33107 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33108 insns with vec_merge. */
33110 static rtx
33111 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33112 rtx target)
33114 rtx pat;
33115 tree arg0 = CALL_EXPR_ARG (exp, 0);
33116 rtx op1, op0 = expand_normal (arg0);
33117 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33118 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33120 if (optimize || !target
33121 || GET_MODE (target) != tmode
33122 || !insn_data[icode].operand[0].predicate (target, tmode))
33123 target = gen_reg_rtx (tmode);
33125 if (VECTOR_MODE_P (mode0))
33126 op0 = safe_vector_operand (op0, mode0);
33128 if ((optimize && !register_operand (op0, mode0))
33129 || !insn_data[icode].operand[1].predicate (op0, mode0))
33130 op0 = copy_to_mode_reg (mode0, op0);
33132 op1 = op0;
33133 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33134 op1 = copy_to_mode_reg (mode0, op1);
33136 pat = GEN_FCN (icode) (target, op0, op1);
33137 if (! pat)
33138 return 0;
33139 emit_insn (pat);
33140 return target;
33143 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33145 static rtx
33146 ix86_expand_sse_compare (const struct builtin_description *d,
33147 tree exp, rtx target, bool swap)
33149 rtx pat;
33150 tree arg0 = CALL_EXPR_ARG (exp, 0);
33151 tree arg1 = CALL_EXPR_ARG (exp, 1);
33152 rtx op0 = expand_normal (arg0);
33153 rtx op1 = expand_normal (arg1);
33154 rtx op2;
33155 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33156 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33157 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33158 enum rtx_code comparison = d->comparison;
33160 if (VECTOR_MODE_P (mode0))
33161 op0 = safe_vector_operand (op0, mode0);
33162 if (VECTOR_MODE_P (mode1))
33163 op1 = safe_vector_operand (op1, mode1);
33165 /* Swap operands if we have a comparison that isn't available in
33166 hardware. */
33167 if (swap)
33169 rtx tmp = gen_reg_rtx (mode1);
33170 emit_move_insn (tmp, op1);
33171 op1 = op0;
33172 op0 = tmp;
33175 if (optimize || !target
33176 || GET_MODE (target) != tmode
33177 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33178 target = gen_reg_rtx (tmode);
33180 if ((optimize && !register_operand (op0, mode0))
33181 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33182 op0 = copy_to_mode_reg (mode0, op0);
33183 if ((optimize && !register_operand (op1, mode1))
33184 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33185 op1 = copy_to_mode_reg (mode1, op1);
33187 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33188 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33189 if (! pat)
33190 return 0;
33191 emit_insn (pat);
33192 return target;
33195 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33197 static rtx
33198 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33199 rtx target)
33201 rtx pat;
33202 tree arg0 = CALL_EXPR_ARG (exp, 0);
33203 tree arg1 = CALL_EXPR_ARG (exp, 1);
33204 rtx op0 = expand_normal (arg0);
33205 rtx op1 = expand_normal (arg1);
33206 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33207 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33208 enum rtx_code comparison = d->comparison;
33210 if (VECTOR_MODE_P (mode0))
33211 op0 = safe_vector_operand (op0, mode0);
33212 if (VECTOR_MODE_P (mode1))
33213 op1 = safe_vector_operand (op1, mode1);
33215 /* Swap operands if we have a comparison that isn't available in
33216 hardware. */
33217 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33219 rtx tmp = op1;
33220 op1 = op0;
33221 op0 = tmp;
33224 target = gen_reg_rtx (SImode);
33225 emit_move_insn (target, const0_rtx);
33226 target = gen_rtx_SUBREG (QImode, target, 0);
33228 if ((optimize && !register_operand (op0, mode0))
33229 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33230 op0 = copy_to_mode_reg (mode0, op0);
33231 if ((optimize && !register_operand (op1, mode1))
33232 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33233 op1 = copy_to_mode_reg (mode1, op1);
33235 pat = GEN_FCN (d->icode) (op0, op1);
33236 if (! pat)
33237 return 0;
33238 emit_insn (pat);
33239 emit_insn (gen_rtx_SET (VOIDmode,
33240 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33241 gen_rtx_fmt_ee (comparison, QImode,
33242 SET_DEST (pat),
33243 const0_rtx)));
33245 return SUBREG_REG (target);
33248 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33250 static rtx
33251 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33252 rtx target)
33254 rtx pat;
33255 tree arg0 = CALL_EXPR_ARG (exp, 0);
33256 rtx op1, op0 = expand_normal (arg0);
33257 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33258 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33260 if (optimize || target == 0
33261 || GET_MODE (target) != tmode
33262 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33263 target = gen_reg_rtx (tmode);
33265 if (VECTOR_MODE_P (mode0))
33266 op0 = safe_vector_operand (op0, mode0);
33268 if ((optimize && !register_operand (op0, mode0))
33269 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33270 op0 = copy_to_mode_reg (mode0, op0);
33272 op1 = GEN_INT (d->comparison);
33274 pat = GEN_FCN (d->icode) (target, op0, op1);
33275 if (! pat)
33276 return 0;
33277 emit_insn (pat);
33278 return target;
33281 static rtx
33282 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33283 tree exp, rtx target)
33285 rtx pat;
33286 tree arg0 = CALL_EXPR_ARG (exp, 0);
33287 tree arg1 = CALL_EXPR_ARG (exp, 1);
33288 rtx op0 = expand_normal (arg0);
33289 rtx op1 = expand_normal (arg1);
33290 rtx op2;
33291 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33292 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33293 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33295 if (optimize || target == 0
33296 || GET_MODE (target) != tmode
33297 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33298 target = gen_reg_rtx (tmode);
33300 op0 = safe_vector_operand (op0, mode0);
33301 op1 = safe_vector_operand (op1, mode1);
33303 if ((optimize && !register_operand (op0, mode0))
33304 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33305 op0 = copy_to_mode_reg (mode0, op0);
33306 if ((optimize && !register_operand (op1, mode1))
33307 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33308 op1 = copy_to_mode_reg (mode1, op1);
33310 op2 = GEN_INT (d->comparison);
33312 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33313 if (! pat)
33314 return 0;
33315 emit_insn (pat);
33316 return target;
33319 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33321 static rtx
33322 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33323 rtx target)
33325 rtx pat;
33326 tree arg0 = CALL_EXPR_ARG (exp, 0);
33327 tree arg1 = CALL_EXPR_ARG (exp, 1);
33328 rtx op0 = expand_normal (arg0);
33329 rtx op1 = expand_normal (arg1);
33330 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33331 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33332 enum rtx_code comparison = d->comparison;
33334 if (VECTOR_MODE_P (mode0))
33335 op0 = safe_vector_operand (op0, mode0);
33336 if (VECTOR_MODE_P (mode1))
33337 op1 = safe_vector_operand (op1, mode1);
33339 target = gen_reg_rtx (SImode);
33340 emit_move_insn (target, const0_rtx);
33341 target = gen_rtx_SUBREG (QImode, target, 0);
33343 if ((optimize && !register_operand (op0, mode0))
33344 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33345 op0 = copy_to_mode_reg (mode0, op0);
33346 if ((optimize && !register_operand (op1, mode1))
33347 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33348 op1 = copy_to_mode_reg (mode1, op1);
33350 pat = GEN_FCN (d->icode) (op0, op1);
33351 if (! pat)
33352 return 0;
33353 emit_insn (pat);
33354 emit_insn (gen_rtx_SET (VOIDmode,
33355 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33356 gen_rtx_fmt_ee (comparison, QImode,
33357 SET_DEST (pat),
33358 const0_rtx)));
33360 return SUBREG_REG (target);
33363 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33365 static rtx
33366 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33367 tree exp, rtx target)
33369 rtx pat;
33370 tree arg0 = CALL_EXPR_ARG (exp, 0);
33371 tree arg1 = CALL_EXPR_ARG (exp, 1);
33372 tree arg2 = CALL_EXPR_ARG (exp, 2);
33373 tree arg3 = CALL_EXPR_ARG (exp, 3);
33374 tree arg4 = CALL_EXPR_ARG (exp, 4);
33375 rtx scratch0, scratch1;
33376 rtx op0 = expand_normal (arg0);
33377 rtx op1 = expand_normal (arg1);
33378 rtx op2 = expand_normal (arg2);
33379 rtx op3 = expand_normal (arg3);
33380 rtx op4 = expand_normal (arg4);
33381 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33383 tmode0 = insn_data[d->icode].operand[0].mode;
33384 tmode1 = insn_data[d->icode].operand[1].mode;
33385 modev2 = insn_data[d->icode].operand[2].mode;
33386 modei3 = insn_data[d->icode].operand[3].mode;
33387 modev4 = insn_data[d->icode].operand[4].mode;
33388 modei5 = insn_data[d->icode].operand[5].mode;
33389 modeimm = insn_data[d->icode].operand[6].mode;
33391 if (VECTOR_MODE_P (modev2))
33392 op0 = safe_vector_operand (op0, modev2);
33393 if (VECTOR_MODE_P (modev4))
33394 op2 = safe_vector_operand (op2, modev4);
33396 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33397 op0 = copy_to_mode_reg (modev2, op0);
33398 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33399 op1 = copy_to_mode_reg (modei3, op1);
33400 if ((optimize && !register_operand (op2, modev4))
33401 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33402 op2 = copy_to_mode_reg (modev4, op2);
33403 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33404 op3 = copy_to_mode_reg (modei5, op3);
33406 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33408 error ("the fifth argument must be an 8-bit immediate");
33409 return const0_rtx;
33412 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33414 if (optimize || !target
33415 || GET_MODE (target) != tmode0
33416 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33417 target = gen_reg_rtx (tmode0);
33419 scratch1 = gen_reg_rtx (tmode1);
33421 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33423 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33425 if (optimize || !target
33426 || GET_MODE (target) != tmode1
33427 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33428 target = gen_reg_rtx (tmode1);
33430 scratch0 = gen_reg_rtx (tmode0);
33432 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33434 else
33436 gcc_assert (d->flag);
33438 scratch0 = gen_reg_rtx (tmode0);
33439 scratch1 = gen_reg_rtx (tmode1);
33441 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33444 if (! pat)
33445 return 0;
33447 emit_insn (pat);
33449 if (d->flag)
33451 target = gen_reg_rtx (SImode);
33452 emit_move_insn (target, const0_rtx);
33453 target = gen_rtx_SUBREG (QImode, target, 0);
33455 emit_insn
33456 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33457 gen_rtx_fmt_ee (EQ, QImode,
33458 gen_rtx_REG ((enum machine_mode) d->flag,
33459 FLAGS_REG),
33460 const0_rtx)));
33461 return SUBREG_REG (target);
33463 else
33464 return target;
33468 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33470 static rtx
33471 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33472 tree exp, rtx target)
33474 rtx pat;
33475 tree arg0 = CALL_EXPR_ARG (exp, 0);
33476 tree arg1 = CALL_EXPR_ARG (exp, 1);
33477 tree arg2 = CALL_EXPR_ARG (exp, 2);
33478 rtx scratch0, scratch1;
33479 rtx op0 = expand_normal (arg0);
33480 rtx op1 = expand_normal (arg1);
33481 rtx op2 = expand_normal (arg2);
33482 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33484 tmode0 = insn_data[d->icode].operand[0].mode;
33485 tmode1 = insn_data[d->icode].operand[1].mode;
33486 modev2 = insn_data[d->icode].operand[2].mode;
33487 modev3 = insn_data[d->icode].operand[3].mode;
33488 modeimm = insn_data[d->icode].operand[4].mode;
33490 if (VECTOR_MODE_P (modev2))
33491 op0 = safe_vector_operand (op0, modev2);
33492 if (VECTOR_MODE_P (modev3))
33493 op1 = safe_vector_operand (op1, modev3);
33495 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33496 op0 = copy_to_mode_reg (modev2, op0);
33497 if ((optimize && !register_operand (op1, modev3))
33498 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33499 op1 = copy_to_mode_reg (modev3, op1);
33501 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33503 error ("the third argument must be an 8-bit immediate");
33504 return const0_rtx;
33507 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33509 if (optimize || !target
33510 || GET_MODE (target) != tmode0
33511 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33512 target = gen_reg_rtx (tmode0);
33514 scratch1 = gen_reg_rtx (tmode1);
33516 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33518 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33520 if (optimize || !target
33521 || GET_MODE (target) != tmode1
33522 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33523 target = gen_reg_rtx (tmode1);
33525 scratch0 = gen_reg_rtx (tmode0);
33527 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33529 else
33531 gcc_assert (d->flag);
33533 scratch0 = gen_reg_rtx (tmode0);
33534 scratch1 = gen_reg_rtx (tmode1);
33536 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33539 if (! pat)
33540 return 0;
33542 emit_insn (pat);
33544 if (d->flag)
33546 target = gen_reg_rtx (SImode);
33547 emit_move_insn (target, const0_rtx);
33548 target = gen_rtx_SUBREG (QImode, target, 0);
33550 emit_insn
33551 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33552 gen_rtx_fmt_ee (EQ, QImode,
33553 gen_rtx_REG ((enum machine_mode) d->flag,
33554 FLAGS_REG),
33555 const0_rtx)));
33556 return SUBREG_REG (target);
33558 else
33559 return target;
33562 /* Subroutine of ix86_expand_builtin to take care of insns with
33563 variable number of operands. */
33565 static rtx
33566 ix86_expand_args_builtin (const struct builtin_description *d,
33567 tree exp, rtx target)
33569 rtx pat, real_target;
33570 unsigned int i, nargs;
33571 unsigned int nargs_constant = 0;
33572 unsigned int mask_pos = 0;
33573 int num_memory = 0;
33574 struct
33576 rtx op;
33577 enum machine_mode mode;
33578 } args[6];
33579 bool last_arg_count = false;
33580 enum insn_code icode = d->icode;
33581 const struct insn_data_d *insn_p = &insn_data[icode];
33582 enum machine_mode tmode = insn_p->operand[0].mode;
33583 enum machine_mode rmode = VOIDmode;
33584 bool swap = false;
33585 enum rtx_code comparison = d->comparison;
33587 switch ((enum ix86_builtin_func_type) d->flag)
33589 case V2DF_FTYPE_V2DF_ROUND:
33590 case V4DF_FTYPE_V4DF_ROUND:
33591 case V4SF_FTYPE_V4SF_ROUND:
33592 case V8SF_FTYPE_V8SF_ROUND:
33593 case V4SI_FTYPE_V4SF_ROUND:
33594 case V8SI_FTYPE_V8SF_ROUND:
33595 return ix86_expand_sse_round (d, exp, target);
33596 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33597 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33598 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33599 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33600 case INT_FTYPE_V8SF_V8SF_PTEST:
33601 case INT_FTYPE_V4DI_V4DI_PTEST:
33602 case INT_FTYPE_V4DF_V4DF_PTEST:
33603 case INT_FTYPE_V4SF_V4SF_PTEST:
33604 case INT_FTYPE_V2DI_V2DI_PTEST:
33605 case INT_FTYPE_V2DF_V2DF_PTEST:
33606 return ix86_expand_sse_ptest (d, exp, target);
33607 case FLOAT128_FTYPE_FLOAT128:
33608 case FLOAT_FTYPE_FLOAT:
33609 case INT_FTYPE_INT:
33610 case UINT64_FTYPE_INT:
33611 case UINT16_FTYPE_UINT16:
33612 case INT64_FTYPE_INT64:
33613 case INT64_FTYPE_V4SF:
33614 case INT64_FTYPE_V2DF:
33615 case INT_FTYPE_V16QI:
33616 case INT_FTYPE_V8QI:
33617 case INT_FTYPE_V8SF:
33618 case INT_FTYPE_V4DF:
33619 case INT_FTYPE_V4SF:
33620 case INT_FTYPE_V2DF:
33621 case INT_FTYPE_V32QI:
33622 case V16QI_FTYPE_V16QI:
33623 case V8SI_FTYPE_V8SF:
33624 case V8SI_FTYPE_V4SI:
33625 case V8HI_FTYPE_V8HI:
33626 case V8HI_FTYPE_V16QI:
33627 case V8QI_FTYPE_V8QI:
33628 case V8SF_FTYPE_V8SF:
33629 case V8SF_FTYPE_V8SI:
33630 case V8SF_FTYPE_V4SF:
33631 case V8SF_FTYPE_V8HI:
33632 case V4SI_FTYPE_V4SI:
33633 case V4SI_FTYPE_V16QI:
33634 case V4SI_FTYPE_V4SF:
33635 case V4SI_FTYPE_V8SI:
33636 case V4SI_FTYPE_V8HI:
33637 case V4SI_FTYPE_V4DF:
33638 case V4SI_FTYPE_V2DF:
33639 case V4HI_FTYPE_V4HI:
33640 case V4DF_FTYPE_V4DF:
33641 case V4DF_FTYPE_V4SI:
33642 case V4DF_FTYPE_V4SF:
33643 case V4DF_FTYPE_V2DF:
33644 case V4SF_FTYPE_V4SF:
33645 case V4SF_FTYPE_V4SI:
33646 case V4SF_FTYPE_V8SF:
33647 case V4SF_FTYPE_V4DF:
33648 case V4SF_FTYPE_V8HI:
33649 case V4SF_FTYPE_V2DF:
33650 case V2DI_FTYPE_V2DI:
33651 case V2DI_FTYPE_V16QI:
33652 case V2DI_FTYPE_V8HI:
33653 case V2DI_FTYPE_V4SI:
33654 case V2DF_FTYPE_V2DF:
33655 case V2DF_FTYPE_V4SI:
33656 case V2DF_FTYPE_V4DF:
33657 case V2DF_FTYPE_V4SF:
33658 case V2DF_FTYPE_V2SI:
33659 case V2SI_FTYPE_V2SI:
33660 case V2SI_FTYPE_V4SF:
33661 case V2SI_FTYPE_V2SF:
33662 case V2SI_FTYPE_V2DF:
33663 case V2SF_FTYPE_V2SF:
33664 case V2SF_FTYPE_V2SI:
33665 case V32QI_FTYPE_V32QI:
33666 case V32QI_FTYPE_V16QI:
33667 case V16HI_FTYPE_V16HI:
33668 case V16HI_FTYPE_V8HI:
33669 case V8SI_FTYPE_V8SI:
33670 case V16HI_FTYPE_V16QI:
33671 case V8SI_FTYPE_V16QI:
33672 case V4DI_FTYPE_V16QI:
33673 case V8SI_FTYPE_V8HI:
33674 case V4DI_FTYPE_V8HI:
33675 case V4DI_FTYPE_V4SI:
33676 case V4DI_FTYPE_V2DI:
33677 case HI_FTYPE_HI:
33678 case UINT_FTYPE_V2DF:
33679 case UINT_FTYPE_V4SF:
33680 case UINT64_FTYPE_V2DF:
33681 case UINT64_FTYPE_V4SF:
33682 case V16QI_FTYPE_V8DI:
33683 case V16HI_FTYPE_V16SI:
33684 case V16SI_FTYPE_HI:
33685 case V16SI_FTYPE_V16SI:
33686 case V16SI_FTYPE_INT:
33687 case V16SF_FTYPE_FLOAT:
33688 case V16SF_FTYPE_V4SF:
33689 case V16SF_FTYPE_V16SF:
33690 case V8HI_FTYPE_V8DI:
33691 case V8UHI_FTYPE_V8UHI:
33692 case V8SI_FTYPE_V8DI:
33693 case V8USI_FTYPE_V8USI:
33694 case V8SF_FTYPE_V8DF:
33695 case V8DI_FTYPE_QI:
33696 case V8DI_FTYPE_INT64:
33697 case V8DI_FTYPE_V4DI:
33698 case V8DI_FTYPE_V8DI:
33699 case V8DF_FTYPE_DOUBLE:
33700 case V8DF_FTYPE_V4DF:
33701 case V8DF_FTYPE_V8DF:
33702 case V8DF_FTYPE_V8SI:
33703 nargs = 1;
33704 break;
33705 case V4SF_FTYPE_V4SF_VEC_MERGE:
33706 case V2DF_FTYPE_V2DF_VEC_MERGE:
33707 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33708 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33709 case V16QI_FTYPE_V16QI_V16QI:
33710 case V16QI_FTYPE_V8HI_V8HI:
33711 case V16SI_FTYPE_V16SI_V16SI:
33712 case V16SF_FTYPE_V16SF_V16SF:
33713 case V16SF_FTYPE_V16SF_V16SI:
33714 case V8QI_FTYPE_V8QI_V8QI:
33715 case V8QI_FTYPE_V4HI_V4HI:
33716 case V8HI_FTYPE_V8HI_V8HI:
33717 case V8HI_FTYPE_V16QI_V16QI:
33718 case V8HI_FTYPE_V4SI_V4SI:
33719 case V8SF_FTYPE_V8SF_V8SF:
33720 case V8SF_FTYPE_V8SF_V8SI:
33721 case V8DI_FTYPE_V8DI_V8DI:
33722 case V8DF_FTYPE_V8DF_V8DF:
33723 case V8DF_FTYPE_V8DF_V8DI:
33724 case V4SI_FTYPE_V4SI_V4SI:
33725 case V4SI_FTYPE_V8HI_V8HI:
33726 case V4SI_FTYPE_V4SF_V4SF:
33727 case V4SI_FTYPE_V2DF_V2DF:
33728 case V4HI_FTYPE_V4HI_V4HI:
33729 case V4HI_FTYPE_V8QI_V8QI:
33730 case V4HI_FTYPE_V2SI_V2SI:
33731 case V4DF_FTYPE_V4DF_V4DF:
33732 case V4DF_FTYPE_V4DF_V4DI:
33733 case V4SF_FTYPE_V4SF_V4SF:
33734 case V4SF_FTYPE_V4SF_V4SI:
33735 case V4SF_FTYPE_V4SF_V2SI:
33736 case V4SF_FTYPE_V4SF_V2DF:
33737 case V4SF_FTYPE_V4SF_UINT:
33738 case V4SF_FTYPE_V4SF_UINT64:
33739 case V4SF_FTYPE_V4SF_DI:
33740 case V4SF_FTYPE_V4SF_SI:
33741 case V2DI_FTYPE_V2DI_V2DI:
33742 case V2DI_FTYPE_V16QI_V16QI:
33743 case V2DI_FTYPE_V4SI_V4SI:
33744 case V2UDI_FTYPE_V4USI_V4USI:
33745 case V2DI_FTYPE_V2DI_V16QI:
33746 case V2DI_FTYPE_V2DF_V2DF:
33747 case V2SI_FTYPE_V2SI_V2SI:
33748 case V2SI_FTYPE_V4HI_V4HI:
33749 case V2SI_FTYPE_V2SF_V2SF:
33750 case V2DF_FTYPE_V2DF_V2DF:
33751 case V2DF_FTYPE_V2DF_V4SF:
33752 case V2DF_FTYPE_V2DF_V2DI:
33753 case V2DF_FTYPE_V2DF_DI:
33754 case V2DF_FTYPE_V2DF_SI:
33755 case V2DF_FTYPE_V2DF_UINT:
33756 case V2DF_FTYPE_V2DF_UINT64:
33757 case V2SF_FTYPE_V2SF_V2SF:
33758 case V1DI_FTYPE_V1DI_V1DI:
33759 case V1DI_FTYPE_V8QI_V8QI:
33760 case V1DI_FTYPE_V2SI_V2SI:
33761 case V32QI_FTYPE_V16HI_V16HI:
33762 case V16HI_FTYPE_V8SI_V8SI:
33763 case V32QI_FTYPE_V32QI_V32QI:
33764 case V16HI_FTYPE_V32QI_V32QI:
33765 case V16HI_FTYPE_V16HI_V16HI:
33766 case V8SI_FTYPE_V4DF_V4DF:
33767 case V8SI_FTYPE_V8SI_V8SI:
33768 case V8SI_FTYPE_V16HI_V16HI:
33769 case V4DI_FTYPE_V4DI_V4DI:
33770 case V4DI_FTYPE_V8SI_V8SI:
33771 case V4UDI_FTYPE_V8USI_V8USI:
33772 case QI_FTYPE_V8DI_V8DI:
33773 case HI_FTYPE_V16SI_V16SI:
33774 if (comparison == UNKNOWN)
33775 return ix86_expand_binop_builtin (icode, exp, target);
33776 nargs = 2;
33777 break;
33778 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33779 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33780 gcc_assert (comparison != UNKNOWN);
33781 nargs = 2;
33782 swap = true;
33783 break;
33784 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33785 case V16HI_FTYPE_V16HI_SI_COUNT:
33786 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33787 case V8SI_FTYPE_V8SI_SI_COUNT:
33788 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33789 case V4DI_FTYPE_V4DI_INT_COUNT:
33790 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33791 case V8HI_FTYPE_V8HI_SI_COUNT:
33792 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33793 case V4SI_FTYPE_V4SI_SI_COUNT:
33794 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33795 case V4HI_FTYPE_V4HI_SI_COUNT:
33796 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33797 case V2DI_FTYPE_V2DI_SI_COUNT:
33798 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33799 case V2SI_FTYPE_V2SI_SI_COUNT:
33800 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33801 case V1DI_FTYPE_V1DI_SI_COUNT:
33802 nargs = 2;
33803 last_arg_count = true;
33804 break;
33805 case UINT64_FTYPE_UINT64_UINT64:
33806 case UINT_FTYPE_UINT_UINT:
33807 case UINT_FTYPE_UINT_USHORT:
33808 case UINT_FTYPE_UINT_UCHAR:
33809 case UINT16_FTYPE_UINT16_INT:
33810 case UINT8_FTYPE_UINT8_INT:
33811 case HI_FTYPE_HI_HI:
33812 case V16SI_FTYPE_V8DF_V8DF:
33813 nargs = 2;
33814 break;
33815 case V2DI_FTYPE_V2DI_INT_CONVERT:
33816 nargs = 2;
33817 rmode = V1TImode;
33818 nargs_constant = 1;
33819 break;
33820 case V4DI_FTYPE_V4DI_INT_CONVERT:
33821 nargs = 2;
33822 rmode = V2TImode;
33823 nargs_constant = 1;
33824 break;
33825 case V8HI_FTYPE_V8HI_INT:
33826 case V8HI_FTYPE_V8SF_INT:
33827 case V16HI_FTYPE_V16SF_INT:
33828 case V8HI_FTYPE_V4SF_INT:
33829 case V8SF_FTYPE_V8SF_INT:
33830 case V4SF_FTYPE_V16SF_INT:
33831 case V16SF_FTYPE_V16SF_INT:
33832 case V4SI_FTYPE_V4SI_INT:
33833 case V4SI_FTYPE_V8SI_INT:
33834 case V4HI_FTYPE_V4HI_INT:
33835 case V4DF_FTYPE_V4DF_INT:
33836 case V4DF_FTYPE_V8DF_INT:
33837 case V4SF_FTYPE_V4SF_INT:
33838 case V4SF_FTYPE_V8SF_INT:
33839 case V2DI_FTYPE_V2DI_INT:
33840 case V2DF_FTYPE_V2DF_INT:
33841 case V2DF_FTYPE_V4DF_INT:
33842 case V16HI_FTYPE_V16HI_INT:
33843 case V8SI_FTYPE_V8SI_INT:
33844 case V16SI_FTYPE_V16SI_INT:
33845 case V4SI_FTYPE_V16SI_INT:
33846 case V4DI_FTYPE_V4DI_INT:
33847 case V2DI_FTYPE_V4DI_INT:
33848 case V4DI_FTYPE_V8DI_INT:
33849 case HI_FTYPE_HI_INT:
33850 nargs = 2;
33851 nargs_constant = 1;
33852 break;
33853 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33854 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33855 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33856 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33857 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33858 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33859 case HI_FTYPE_V16SI_V16SI_HI:
33860 case QI_FTYPE_V8DI_V8DI_QI:
33861 case V16HI_FTYPE_V16SI_V16HI_HI:
33862 case V16QI_FTYPE_V16SI_V16QI_HI:
33863 case V16QI_FTYPE_V8DI_V16QI_QI:
33864 case V16SF_FTYPE_V16SF_V16SF_HI:
33865 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33866 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33867 case V16SF_FTYPE_V16SI_V16SF_HI:
33868 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33869 case V16SF_FTYPE_V4SF_V16SF_HI:
33870 case V16SI_FTYPE_SI_V16SI_HI:
33871 case V16SI_FTYPE_V16HI_V16SI_HI:
33872 case V16SI_FTYPE_V16QI_V16SI_HI:
33873 case V16SI_FTYPE_V16SF_V16SI_HI:
33874 case V16SI_FTYPE_V16SI_V16SI_HI:
33875 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33876 case V16SI_FTYPE_V4SI_V16SI_HI:
33877 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33878 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33879 case V8DF_FTYPE_V2DF_V8DF_QI:
33880 case V8DF_FTYPE_V4DF_V8DF_QI:
33881 case V8DF_FTYPE_V8DF_V8DF_QI:
33882 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33883 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33884 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33885 case V8DF_FTYPE_V8SF_V8DF_QI:
33886 case V8DF_FTYPE_V8SI_V8DF_QI:
33887 case V8DI_FTYPE_DI_V8DI_QI:
33888 case V8DI_FTYPE_V16QI_V8DI_QI:
33889 case V8DI_FTYPE_V2DI_V8DI_QI:
33890 case V8DI_FTYPE_V4DI_V8DI_QI:
33891 case V8DI_FTYPE_V8DI_V8DI_QI:
33892 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33893 case V8DI_FTYPE_V8HI_V8DI_QI:
33894 case V8DI_FTYPE_V8SI_V8DI_QI:
33895 case V8HI_FTYPE_V8DI_V8HI_QI:
33896 case V8SF_FTYPE_V8DF_V8SF_QI:
33897 case V8SI_FTYPE_V8DF_V8SI_QI:
33898 case V8SI_FTYPE_V8DI_V8SI_QI:
33899 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33900 nargs = 3;
33901 break;
33902 case V32QI_FTYPE_V32QI_V32QI_INT:
33903 case V16HI_FTYPE_V16HI_V16HI_INT:
33904 case V16QI_FTYPE_V16QI_V16QI_INT:
33905 case V4DI_FTYPE_V4DI_V4DI_INT:
33906 case V8HI_FTYPE_V8HI_V8HI_INT:
33907 case V8SI_FTYPE_V8SI_V8SI_INT:
33908 case V8SI_FTYPE_V8SI_V4SI_INT:
33909 case V8SF_FTYPE_V8SF_V8SF_INT:
33910 case V8SF_FTYPE_V8SF_V4SF_INT:
33911 case V4SI_FTYPE_V4SI_V4SI_INT:
33912 case V4DF_FTYPE_V4DF_V4DF_INT:
33913 case V16SF_FTYPE_V16SF_V16SF_INT:
33914 case V16SF_FTYPE_V16SF_V4SF_INT:
33915 case V16SI_FTYPE_V16SI_V4SI_INT:
33916 case V4DF_FTYPE_V4DF_V2DF_INT:
33917 case V4SF_FTYPE_V4SF_V4SF_INT:
33918 case V2DI_FTYPE_V2DI_V2DI_INT:
33919 case V4DI_FTYPE_V4DI_V2DI_INT:
33920 case V2DF_FTYPE_V2DF_V2DF_INT:
33921 case QI_FTYPE_V8DI_V8DI_INT:
33922 case QI_FTYPE_V8DF_V8DF_INT:
33923 case QI_FTYPE_V2DF_V2DF_INT:
33924 case QI_FTYPE_V4SF_V4SF_INT:
33925 case HI_FTYPE_V16SI_V16SI_INT:
33926 case HI_FTYPE_V16SF_V16SF_INT:
33927 nargs = 3;
33928 nargs_constant = 1;
33929 break;
33930 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33931 nargs = 3;
33932 rmode = V4DImode;
33933 nargs_constant = 1;
33934 break;
33935 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33936 nargs = 3;
33937 rmode = V2DImode;
33938 nargs_constant = 1;
33939 break;
33940 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33941 nargs = 3;
33942 rmode = DImode;
33943 nargs_constant = 1;
33944 break;
33945 case V2DI_FTYPE_V2DI_UINT_UINT:
33946 nargs = 3;
33947 nargs_constant = 2;
33948 break;
33949 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33950 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33951 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33952 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33953 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33954 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33955 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33956 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33957 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33958 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33959 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33960 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33961 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33962 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33963 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33964 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33965 nargs = 4;
33966 break;
33967 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33968 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33969 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33970 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33971 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33972 nargs = 4;
33973 nargs_constant = 1;
33974 break;
33975 case QI_FTYPE_V2DF_V2DF_INT_QI:
33976 case QI_FTYPE_V4SF_V4SF_INT_QI:
33977 nargs = 4;
33978 mask_pos = 1;
33979 nargs_constant = 1;
33980 break;
33981 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33982 nargs = 4;
33983 nargs_constant = 2;
33984 break;
33985 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33986 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33987 nargs = 4;
33988 break;
33989 case QI_FTYPE_V8DI_V8DI_INT_QI:
33990 case HI_FTYPE_V16SI_V16SI_INT_HI:
33991 case QI_FTYPE_V8DF_V8DF_INT_QI:
33992 case HI_FTYPE_V16SF_V16SF_INT_HI:
33993 mask_pos = 1;
33994 nargs = 4;
33995 nargs_constant = 1;
33996 break;
33997 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33998 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33999 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
34000 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
34001 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
34002 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
34003 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
34004 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
34005 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
34006 nargs = 4;
34007 mask_pos = 2;
34008 nargs_constant = 1;
34009 break;
34010 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
34011 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
34012 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
34013 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
34014 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
34015 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
34016 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
34017 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
34018 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
34019 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
34020 nargs = 5;
34021 mask_pos = 2;
34022 nargs_constant = 1;
34023 break;
34024 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34025 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34026 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34027 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34028 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34029 nargs = 5;
34030 mask_pos = 1;
34031 nargs_constant = 1;
34032 break;
34034 default:
34035 gcc_unreachable ();
34038 gcc_assert (nargs <= ARRAY_SIZE (args));
34040 if (comparison != UNKNOWN)
34042 gcc_assert (nargs == 2);
34043 return ix86_expand_sse_compare (d, exp, target, swap);
34046 if (rmode == VOIDmode || rmode == tmode)
34048 if (optimize
34049 || target == 0
34050 || GET_MODE (target) != tmode
34051 || !insn_p->operand[0].predicate (target, tmode))
34052 target = gen_reg_rtx (tmode);
34053 real_target = target;
34055 else
34057 real_target = gen_reg_rtx (tmode);
34058 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34061 for (i = 0; i < nargs; i++)
34063 tree arg = CALL_EXPR_ARG (exp, i);
34064 rtx op = expand_normal (arg);
34065 enum machine_mode mode = insn_p->operand[i + 1].mode;
34066 bool match = insn_p->operand[i + 1].predicate (op, mode);
34068 if (last_arg_count && (i + 1) == nargs)
34070 /* SIMD shift insns take either an 8-bit immediate or
34071 register as count. But builtin functions take int as
34072 count. If count doesn't match, we put it in register. */
34073 if (!match)
34075 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34076 if (!insn_p->operand[i + 1].predicate (op, mode))
34077 op = copy_to_reg (op);
34080 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34081 (!mask_pos && (nargs - i) <= nargs_constant))
34083 if (!match)
34084 switch (icode)
34086 case CODE_FOR_avx2_inserti128:
34087 case CODE_FOR_avx2_extracti128:
34088 error ("the last argument must be an 1-bit immediate");
34089 return const0_rtx;
34091 case CODE_FOR_avx512f_cmpv8di3_mask:
34092 case CODE_FOR_avx512f_cmpv16si3_mask:
34093 case CODE_FOR_avx512f_ucmpv8di3_mask:
34094 case CODE_FOR_avx512f_ucmpv16si3_mask:
34095 error ("the last argument must be a 3-bit immediate");
34096 return const0_rtx;
34098 case CODE_FOR_sse4_1_roundsd:
34099 case CODE_FOR_sse4_1_roundss:
34101 case CODE_FOR_sse4_1_roundpd:
34102 case CODE_FOR_sse4_1_roundps:
34103 case CODE_FOR_avx_roundpd256:
34104 case CODE_FOR_avx_roundps256:
34106 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34107 case CODE_FOR_sse4_1_roundps_sfix:
34108 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34109 case CODE_FOR_avx_roundps_sfix256:
34111 case CODE_FOR_sse4_1_blendps:
34112 case CODE_FOR_avx_blendpd256:
34113 case CODE_FOR_avx_vpermilv4df:
34114 case CODE_FOR_avx512f_getmantv8df_mask:
34115 case CODE_FOR_avx512f_getmantv16sf_mask:
34116 error ("the last argument must be a 4-bit immediate");
34117 return const0_rtx;
34119 case CODE_FOR_sha1rnds4:
34120 case CODE_FOR_sse4_1_blendpd:
34121 case CODE_FOR_avx_vpermilv2df:
34122 case CODE_FOR_xop_vpermil2v2df3:
34123 case CODE_FOR_xop_vpermil2v4sf3:
34124 case CODE_FOR_xop_vpermil2v4df3:
34125 case CODE_FOR_xop_vpermil2v8sf3:
34126 case CODE_FOR_avx512f_vinsertf32x4_mask:
34127 case CODE_FOR_avx512f_vinserti32x4_mask:
34128 case CODE_FOR_avx512f_vextractf32x4_mask:
34129 case CODE_FOR_avx512f_vextracti32x4_mask:
34130 error ("the last argument must be a 2-bit immediate");
34131 return const0_rtx;
34133 case CODE_FOR_avx_vextractf128v4df:
34134 case CODE_FOR_avx_vextractf128v8sf:
34135 case CODE_FOR_avx_vextractf128v8si:
34136 case CODE_FOR_avx_vinsertf128v4df:
34137 case CODE_FOR_avx_vinsertf128v8sf:
34138 case CODE_FOR_avx_vinsertf128v8si:
34139 case CODE_FOR_avx512f_vinsertf64x4_mask:
34140 case CODE_FOR_avx512f_vinserti64x4_mask:
34141 case CODE_FOR_avx512f_vextractf64x4_mask:
34142 case CODE_FOR_avx512f_vextracti64x4_mask:
34143 error ("the last argument must be a 1-bit immediate");
34144 return const0_rtx;
34146 case CODE_FOR_avx_vmcmpv2df3:
34147 case CODE_FOR_avx_vmcmpv4sf3:
34148 case CODE_FOR_avx_cmpv2df3:
34149 case CODE_FOR_avx_cmpv4sf3:
34150 case CODE_FOR_avx_cmpv4df3:
34151 case CODE_FOR_avx_cmpv8sf3:
34152 case CODE_FOR_avx512f_cmpv8df3_mask:
34153 case CODE_FOR_avx512f_cmpv16sf3_mask:
34154 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34155 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34156 error ("the last argument must be a 5-bit immediate");
34157 return const0_rtx;
34159 default:
34160 switch (nargs_constant)
34162 case 2:
34163 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34164 (!mask_pos && (nargs - i) == nargs_constant))
34166 error ("the next to last argument must be an 8-bit immediate");
34167 break;
34169 case 1:
34170 error ("the last argument must be an 8-bit immediate");
34171 break;
34172 default:
34173 gcc_unreachable ();
34175 return const0_rtx;
34178 else
34180 if (VECTOR_MODE_P (mode))
34181 op = safe_vector_operand (op, mode);
34183 /* If we aren't optimizing, only allow one memory operand to
34184 be generated. */
34185 if (memory_operand (op, mode))
34186 num_memory++;
34188 op = fixup_modeless_constant (op, mode);
34190 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34192 if (optimize || !match || num_memory > 1)
34193 op = copy_to_mode_reg (mode, op);
34195 else
34197 op = copy_to_reg (op);
34198 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34202 args[i].op = op;
34203 args[i].mode = mode;
34206 switch (nargs)
34208 case 1:
34209 pat = GEN_FCN (icode) (real_target, args[0].op);
34210 break;
34211 case 2:
34212 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34213 break;
34214 case 3:
34215 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34216 args[2].op);
34217 break;
34218 case 4:
34219 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34220 args[2].op, args[3].op);
34221 break;
34222 case 5:
34223 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34224 args[2].op, args[3].op, args[4].op);
34225 case 6:
34226 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34227 args[2].op, args[3].op, args[4].op,
34228 args[5].op);
34229 break;
34230 default:
34231 gcc_unreachable ();
34234 if (! pat)
34235 return 0;
34237 emit_insn (pat);
34238 return target;
34241 /* Transform pattern of following layout:
34242 (parallel [
34243 set (A B)
34244 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34246 into:
34247 (set (A B))
34250 (parallel [ A B
34252 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34255 into:
34256 (parallel [ A B ... ]) */
34258 static rtx
34259 ix86_erase_embedded_rounding (rtx pat)
34261 if (GET_CODE (pat) == INSN)
34262 pat = PATTERN (pat);
34264 gcc_assert (GET_CODE (pat) == PARALLEL);
34266 if (XVECLEN (pat, 0) == 2)
34268 rtx p0 = XVECEXP (pat, 0, 0);
34269 rtx p1 = XVECEXP (pat, 0, 1);
34271 gcc_assert (GET_CODE (p0) == SET
34272 && GET_CODE (p1) == UNSPEC
34273 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34275 return p0;
34277 else
34279 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34280 int i = 0;
34281 int j = 0;
34283 for (; i < XVECLEN (pat, 0); ++i)
34285 rtx elem = XVECEXP (pat, 0, i);
34286 if (GET_CODE (elem) != UNSPEC
34287 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34288 res [j++] = elem;
34291 /* No more than 1 occurence was removed. */
34292 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34294 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34298 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34299 with rounding. */
34300 static rtx
34301 ix86_expand_sse_comi_round (const struct builtin_description *d,
34302 tree exp, rtx target)
34304 rtx pat, set_dst;
34305 tree arg0 = CALL_EXPR_ARG (exp, 0);
34306 tree arg1 = CALL_EXPR_ARG (exp, 1);
34307 tree arg2 = CALL_EXPR_ARG (exp, 2);
34308 tree arg3 = CALL_EXPR_ARG (exp, 3);
34309 rtx op0 = expand_normal (arg0);
34310 rtx op1 = expand_normal (arg1);
34311 rtx op2 = expand_normal (arg2);
34312 rtx op3 = expand_normal (arg3);
34313 enum insn_code icode = d->icode;
34314 const struct insn_data_d *insn_p = &insn_data[icode];
34315 enum machine_mode mode0 = insn_p->operand[0].mode;
34316 enum machine_mode mode1 = insn_p->operand[1].mode;
34317 enum rtx_code comparison = UNEQ;
34318 bool need_ucomi = false;
34320 /* See avxintrin.h for values. */
34321 enum rtx_code comi_comparisons[32] =
34323 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34324 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34325 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34327 bool need_ucomi_values[32] =
34329 true, false, false, true, true, false, false, true,
34330 true, false, false, true, true, false, false, true,
34331 false, true, true, false, false, true, true, false,
34332 false, true, true, false, false, true, true, false
34335 if (!CONST_INT_P (op2))
34337 error ("the third argument must be comparison constant");
34338 return const0_rtx;
34340 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34342 error ("incorect comparison mode");
34343 return const0_rtx;
34346 if (!insn_p->operand[2].predicate (op3, SImode))
34348 error ("incorrect rounding operand");
34349 return const0_rtx;
34352 comparison = comi_comparisons[INTVAL (op2)];
34353 need_ucomi = need_ucomi_values[INTVAL (op2)];
34355 if (VECTOR_MODE_P (mode0))
34356 op0 = safe_vector_operand (op0, mode0);
34357 if (VECTOR_MODE_P (mode1))
34358 op1 = safe_vector_operand (op1, mode1);
34360 target = gen_reg_rtx (SImode);
34361 emit_move_insn (target, const0_rtx);
34362 target = gen_rtx_SUBREG (QImode, target, 0);
34364 if ((optimize && !register_operand (op0, mode0))
34365 || !insn_p->operand[0].predicate (op0, mode0))
34366 op0 = copy_to_mode_reg (mode0, op0);
34367 if ((optimize && !register_operand (op1, mode1))
34368 || !insn_p->operand[1].predicate (op1, mode1))
34369 op1 = copy_to_mode_reg (mode1, op1);
34371 if (need_ucomi)
34372 icode = icode == CODE_FOR_sse_comi_round
34373 ? CODE_FOR_sse_ucomi_round
34374 : CODE_FOR_sse2_ucomi_round;
34376 pat = GEN_FCN (icode) (op0, op1, op3);
34377 if (! pat)
34378 return 0;
34380 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34381 if (INTVAL (op3) == NO_ROUND)
34383 pat = ix86_erase_embedded_rounding (pat);
34384 if (! pat)
34385 return 0;
34387 set_dst = SET_DEST (pat);
34389 else
34391 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34392 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34395 emit_insn (pat);
34396 emit_insn (gen_rtx_SET (VOIDmode,
34397 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34398 gen_rtx_fmt_ee (comparison, QImode,
34399 set_dst,
34400 const0_rtx)));
34402 return SUBREG_REG (target);
34405 static rtx
34406 ix86_expand_round_builtin (const struct builtin_description *d,
34407 tree exp, rtx target)
34409 rtx pat;
34410 unsigned int i, nargs;
34411 struct
34413 rtx op;
34414 enum machine_mode mode;
34415 } args[6];
34416 enum insn_code icode = d->icode;
34417 const struct insn_data_d *insn_p = &insn_data[icode];
34418 enum machine_mode tmode = insn_p->operand[0].mode;
34419 unsigned int nargs_constant = 0;
34420 unsigned int redundant_embed_rnd = 0;
34422 switch ((enum ix86_builtin_func_type) d->flag)
34424 case UINT64_FTYPE_V2DF_INT:
34425 case UINT64_FTYPE_V4SF_INT:
34426 case UINT_FTYPE_V2DF_INT:
34427 case UINT_FTYPE_V4SF_INT:
34428 case INT64_FTYPE_V2DF_INT:
34429 case INT64_FTYPE_V4SF_INT:
34430 case INT_FTYPE_V2DF_INT:
34431 case INT_FTYPE_V4SF_INT:
34432 nargs = 2;
34433 break;
34434 case V4SF_FTYPE_V4SF_UINT_INT:
34435 case V4SF_FTYPE_V4SF_UINT64_INT:
34436 case V2DF_FTYPE_V2DF_UINT64_INT:
34437 case V4SF_FTYPE_V4SF_INT_INT:
34438 case V4SF_FTYPE_V4SF_INT64_INT:
34439 case V2DF_FTYPE_V2DF_INT64_INT:
34440 case V4SF_FTYPE_V4SF_V4SF_INT:
34441 case V2DF_FTYPE_V2DF_V2DF_INT:
34442 case V4SF_FTYPE_V4SF_V2DF_INT:
34443 case V2DF_FTYPE_V2DF_V4SF_INT:
34444 nargs = 3;
34445 break;
34446 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34447 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34448 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34449 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34450 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34451 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34452 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34453 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34454 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34455 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34456 nargs = 4;
34457 break;
34458 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34459 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34460 nargs_constant = 2;
34461 nargs = 4;
34462 break;
34463 case INT_FTYPE_V4SF_V4SF_INT_INT:
34464 case INT_FTYPE_V2DF_V2DF_INT_INT:
34465 return ix86_expand_sse_comi_round (d, exp, target);
34466 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34467 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34468 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34469 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34470 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34471 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34472 nargs = 5;
34473 break;
34474 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34475 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34476 nargs_constant = 4;
34477 nargs = 5;
34478 break;
34479 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34480 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34481 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34482 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34483 nargs_constant = 3;
34484 nargs = 5;
34485 break;
34486 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34487 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34488 nargs = 6;
34489 nargs_constant = 4;
34490 break;
34491 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34492 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34493 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34494 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34495 nargs = 6;
34496 nargs_constant = 3;
34497 break;
34498 default:
34499 gcc_unreachable ();
34501 gcc_assert (nargs <= ARRAY_SIZE (args));
34503 if (optimize
34504 || target == 0
34505 || GET_MODE (target) != tmode
34506 || !insn_p->operand[0].predicate (target, tmode))
34507 target = gen_reg_rtx (tmode);
34509 for (i = 0; i < nargs; i++)
34511 tree arg = CALL_EXPR_ARG (exp, i);
34512 rtx op = expand_normal (arg);
34513 enum machine_mode mode = insn_p->operand[i + 1].mode;
34514 bool match = insn_p->operand[i + 1].predicate (op, mode);
34516 if (i == nargs - nargs_constant)
34518 if (!match)
34520 switch (icode)
34522 case CODE_FOR_avx512f_getmantv8df_mask_round:
34523 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34524 case CODE_FOR_avx512f_getmantv2df_round:
34525 case CODE_FOR_avx512f_getmantv4sf_round:
34526 error ("the immediate argument must be a 4-bit immediate");
34527 return const0_rtx;
34528 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34529 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34530 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34531 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34532 error ("the immediate argument must be a 5-bit immediate");
34533 return const0_rtx;
34534 default:
34535 error ("the immediate argument must be an 8-bit immediate");
34536 return const0_rtx;
34540 else if (i == nargs-1)
34542 if (!insn_p->operand[nargs].predicate (op, SImode))
34544 error ("incorrect rounding operand");
34545 return const0_rtx;
34548 /* If there is no rounding use normal version of the pattern. */
34549 if (INTVAL (op) == NO_ROUND)
34550 redundant_embed_rnd = 1;
34552 else
34554 if (VECTOR_MODE_P (mode))
34555 op = safe_vector_operand (op, mode);
34557 op = fixup_modeless_constant (op, mode);
34559 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34561 if (optimize || !match)
34562 op = copy_to_mode_reg (mode, op);
34564 else
34566 op = copy_to_reg (op);
34567 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34571 args[i].op = op;
34572 args[i].mode = mode;
34575 switch (nargs)
34577 case 1:
34578 pat = GEN_FCN (icode) (target, args[0].op);
34579 break;
34580 case 2:
34581 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34582 break;
34583 case 3:
34584 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34585 args[2].op);
34586 break;
34587 case 4:
34588 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34589 args[2].op, args[3].op);
34590 break;
34591 case 5:
34592 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34593 args[2].op, args[3].op, args[4].op);
34594 case 6:
34595 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34596 args[2].op, args[3].op, args[4].op,
34597 args[5].op);
34598 break;
34599 default:
34600 gcc_unreachable ();
34603 if (!pat)
34604 return 0;
34606 if (redundant_embed_rnd)
34607 pat = ix86_erase_embedded_rounding (pat);
34609 emit_insn (pat);
34610 return target;
34613 /* Subroutine of ix86_expand_builtin to take care of special insns
34614 with variable number of operands. */
34616 static rtx
34617 ix86_expand_special_args_builtin (const struct builtin_description *d,
34618 tree exp, rtx target)
34620 tree arg;
34621 rtx pat, op;
34622 unsigned int i, nargs, arg_adjust, memory;
34623 bool aligned_mem = false;
34624 struct
34626 rtx op;
34627 enum machine_mode mode;
34628 } args[3];
34629 enum insn_code icode = d->icode;
34630 bool last_arg_constant = false;
34631 const struct insn_data_d *insn_p = &insn_data[icode];
34632 enum machine_mode tmode = insn_p->operand[0].mode;
34633 enum { load, store } klass;
34635 switch ((enum ix86_builtin_func_type) d->flag)
34637 case VOID_FTYPE_VOID:
34638 emit_insn (GEN_FCN (icode) (target));
34639 return 0;
34640 case VOID_FTYPE_UINT64:
34641 case VOID_FTYPE_UNSIGNED:
34642 nargs = 0;
34643 klass = store;
34644 memory = 0;
34645 break;
34647 case INT_FTYPE_VOID:
34648 case UINT64_FTYPE_VOID:
34649 case UNSIGNED_FTYPE_VOID:
34650 nargs = 0;
34651 klass = load;
34652 memory = 0;
34653 break;
34654 case UINT64_FTYPE_PUNSIGNED:
34655 case V2DI_FTYPE_PV2DI:
34656 case V4DI_FTYPE_PV4DI:
34657 case V32QI_FTYPE_PCCHAR:
34658 case V16QI_FTYPE_PCCHAR:
34659 case V8SF_FTYPE_PCV4SF:
34660 case V8SF_FTYPE_PCFLOAT:
34661 case V4SF_FTYPE_PCFLOAT:
34662 case V4DF_FTYPE_PCV2DF:
34663 case V4DF_FTYPE_PCDOUBLE:
34664 case V2DF_FTYPE_PCDOUBLE:
34665 case VOID_FTYPE_PVOID:
34666 case V16SI_FTYPE_PV4SI:
34667 case V16SF_FTYPE_PV4SF:
34668 case V8DI_FTYPE_PV4DI:
34669 case V8DI_FTYPE_PV8DI:
34670 case V8DF_FTYPE_PV4DF:
34671 nargs = 1;
34672 klass = load;
34673 memory = 0;
34674 switch (icode)
34676 case CODE_FOR_sse4_1_movntdqa:
34677 case CODE_FOR_avx2_movntdqa:
34678 case CODE_FOR_avx512f_movntdqa:
34679 aligned_mem = true;
34680 break;
34681 default:
34682 break;
34684 break;
34685 case VOID_FTYPE_PV2SF_V4SF:
34686 case VOID_FTYPE_PV8DI_V8DI:
34687 case VOID_FTYPE_PV4DI_V4DI:
34688 case VOID_FTYPE_PV2DI_V2DI:
34689 case VOID_FTYPE_PCHAR_V32QI:
34690 case VOID_FTYPE_PCHAR_V16QI:
34691 case VOID_FTYPE_PFLOAT_V16SF:
34692 case VOID_FTYPE_PFLOAT_V8SF:
34693 case VOID_FTYPE_PFLOAT_V4SF:
34694 case VOID_FTYPE_PDOUBLE_V8DF:
34695 case VOID_FTYPE_PDOUBLE_V4DF:
34696 case VOID_FTYPE_PDOUBLE_V2DF:
34697 case VOID_FTYPE_PLONGLONG_LONGLONG:
34698 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34699 case VOID_FTYPE_PINT_INT:
34700 nargs = 1;
34701 klass = store;
34702 /* Reserve memory operand for target. */
34703 memory = ARRAY_SIZE (args);
34704 switch (icode)
34706 /* These builtins and instructions require the memory
34707 to be properly aligned. */
34708 case CODE_FOR_avx_movntv4di:
34709 case CODE_FOR_sse2_movntv2di:
34710 case CODE_FOR_avx_movntv8sf:
34711 case CODE_FOR_sse_movntv4sf:
34712 case CODE_FOR_sse4a_vmmovntv4sf:
34713 case CODE_FOR_avx_movntv4df:
34714 case CODE_FOR_sse2_movntv2df:
34715 case CODE_FOR_sse4a_vmmovntv2df:
34716 case CODE_FOR_sse2_movntidi:
34717 case CODE_FOR_sse_movntq:
34718 case CODE_FOR_sse2_movntisi:
34719 case CODE_FOR_avx512f_movntv16sf:
34720 case CODE_FOR_avx512f_movntv8df:
34721 case CODE_FOR_avx512f_movntv8di:
34722 aligned_mem = true;
34723 break;
34724 default:
34725 break;
34727 break;
34728 case V4SF_FTYPE_V4SF_PCV2SF:
34729 case V2DF_FTYPE_V2DF_PCDOUBLE:
34730 nargs = 2;
34731 klass = load;
34732 memory = 1;
34733 break;
34734 case V8SF_FTYPE_PCV8SF_V8SI:
34735 case V4DF_FTYPE_PCV4DF_V4DI:
34736 case V4SF_FTYPE_PCV4SF_V4SI:
34737 case V2DF_FTYPE_PCV2DF_V2DI:
34738 case V8SI_FTYPE_PCV8SI_V8SI:
34739 case V4DI_FTYPE_PCV4DI_V4DI:
34740 case V4SI_FTYPE_PCV4SI_V4SI:
34741 case V2DI_FTYPE_PCV2DI_V2DI:
34742 nargs = 2;
34743 klass = load;
34744 memory = 0;
34745 break;
34746 case VOID_FTYPE_PV8DF_V8DF_QI:
34747 case VOID_FTYPE_PV16SF_V16SF_HI:
34748 case VOID_FTYPE_PV8DI_V8DI_QI:
34749 case VOID_FTYPE_PV16SI_V16SI_HI:
34750 switch (icode)
34752 /* These builtins and instructions require the memory
34753 to be properly aligned. */
34754 case CODE_FOR_avx512f_storev16sf_mask:
34755 case CODE_FOR_avx512f_storev16si_mask:
34756 case CODE_FOR_avx512f_storev8df_mask:
34757 case CODE_FOR_avx512f_storev8di_mask:
34758 aligned_mem = true;
34759 break;
34760 default:
34761 break;
34763 /* FALLTHRU */
34764 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34765 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34766 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34767 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34768 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34769 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34770 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34771 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34772 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34773 case VOID_FTYPE_PFLOAT_V4SF_QI:
34774 case VOID_FTYPE_PV8SI_V8DI_QI:
34775 case VOID_FTYPE_PV8HI_V8DI_QI:
34776 case VOID_FTYPE_PV16HI_V16SI_HI:
34777 case VOID_FTYPE_PV16QI_V8DI_QI:
34778 case VOID_FTYPE_PV16QI_V16SI_HI:
34779 nargs = 2;
34780 klass = store;
34781 /* Reserve memory operand for target. */
34782 memory = ARRAY_SIZE (args);
34783 break;
34784 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34785 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34786 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34787 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34788 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34789 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34790 nargs = 3;
34791 klass = load;
34792 memory = 0;
34793 switch (icode)
34795 /* These builtins and instructions require the memory
34796 to be properly aligned. */
34797 case CODE_FOR_avx512f_loadv16sf_mask:
34798 case CODE_FOR_avx512f_loadv16si_mask:
34799 case CODE_FOR_avx512f_loadv8df_mask:
34800 case CODE_FOR_avx512f_loadv8di_mask:
34801 aligned_mem = true;
34802 break;
34803 default:
34804 break;
34806 break;
34807 case VOID_FTYPE_UINT_UINT_UINT:
34808 case VOID_FTYPE_UINT64_UINT_UINT:
34809 case UCHAR_FTYPE_UINT_UINT_UINT:
34810 case UCHAR_FTYPE_UINT64_UINT_UINT:
34811 nargs = 3;
34812 klass = load;
34813 memory = ARRAY_SIZE (args);
34814 last_arg_constant = true;
34815 break;
34816 default:
34817 gcc_unreachable ();
34820 gcc_assert (nargs <= ARRAY_SIZE (args));
34822 if (klass == store)
34824 arg = CALL_EXPR_ARG (exp, 0);
34825 op = expand_normal (arg);
34826 gcc_assert (target == 0);
34827 if (memory)
34829 op = ix86_zero_extend_to_Pmode (op);
34830 target = gen_rtx_MEM (tmode, op);
34831 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34832 on it. Try to improve it using get_pointer_alignment,
34833 and if the special builtin is one that requires strict
34834 mode alignment, also from it's GET_MODE_ALIGNMENT.
34835 Failure to do so could lead to ix86_legitimate_combined_insn
34836 rejecting all changes to such insns. */
34837 unsigned int align = get_pointer_alignment (arg);
34838 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34839 align = GET_MODE_ALIGNMENT (tmode);
34840 if (MEM_ALIGN (target) < align)
34841 set_mem_align (target, align);
34843 else
34844 target = force_reg (tmode, op);
34845 arg_adjust = 1;
34847 else
34849 arg_adjust = 0;
34850 if (optimize
34851 || target == 0
34852 || !register_operand (target, tmode)
34853 || GET_MODE (target) != tmode)
34854 target = gen_reg_rtx (tmode);
34857 for (i = 0; i < nargs; i++)
34859 enum machine_mode mode = insn_p->operand[i + 1].mode;
34860 bool match;
34862 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34863 op = expand_normal (arg);
34864 match = insn_p->operand[i + 1].predicate (op, mode);
34866 if (last_arg_constant && (i + 1) == nargs)
34868 if (!match)
34870 if (icode == CODE_FOR_lwp_lwpvalsi3
34871 || icode == CODE_FOR_lwp_lwpinssi3
34872 || icode == CODE_FOR_lwp_lwpvaldi3
34873 || icode == CODE_FOR_lwp_lwpinsdi3)
34874 error ("the last argument must be a 32-bit immediate");
34875 else
34876 error ("the last argument must be an 8-bit immediate");
34877 return const0_rtx;
34880 else
34882 if (i == memory)
34884 /* This must be the memory operand. */
34885 op = ix86_zero_extend_to_Pmode (op);
34886 op = gen_rtx_MEM (mode, op);
34887 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34888 on it. Try to improve it using get_pointer_alignment,
34889 and if the special builtin is one that requires strict
34890 mode alignment, also from it's GET_MODE_ALIGNMENT.
34891 Failure to do so could lead to ix86_legitimate_combined_insn
34892 rejecting all changes to such insns. */
34893 unsigned int align = get_pointer_alignment (arg);
34894 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34895 align = GET_MODE_ALIGNMENT (mode);
34896 if (MEM_ALIGN (op) < align)
34897 set_mem_align (op, align);
34899 else
34901 /* This must be register. */
34902 if (VECTOR_MODE_P (mode))
34903 op = safe_vector_operand (op, mode);
34905 op = fixup_modeless_constant (op, mode);
34907 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34908 op = copy_to_mode_reg (mode, op);
34909 else
34911 op = copy_to_reg (op);
34912 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34917 args[i].op = op;
34918 args[i].mode = mode;
34921 switch (nargs)
34923 case 0:
34924 pat = GEN_FCN (icode) (target);
34925 break;
34926 case 1:
34927 pat = GEN_FCN (icode) (target, args[0].op);
34928 break;
34929 case 2:
34930 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34931 break;
34932 case 3:
34933 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34934 break;
34935 default:
34936 gcc_unreachable ();
34939 if (! pat)
34940 return 0;
34941 emit_insn (pat);
34942 return klass == store ? 0 : target;
34945 /* Return the integer constant in ARG. Constrain it to be in the range
34946 of the subparts of VEC_TYPE; issue an error if not. */
34948 static int
34949 get_element_number (tree vec_type, tree arg)
34951 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34953 if (!tree_fits_uhwi_p (arg)
34954 || (elt = tree_to_uhwi (arg), elt > max))
34956 error ("selector must be an integer constant in the range 0..%wi", max);
34957 return 0;
34960 return elt;
34963 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34964 ix86_expand_vector_init. We DO have language-level syntax for this, in
34965 the form of (type){ init-list }. Except that since we can't place emms
34966 instructions from inside the compiler, we can't allow the use of MMX
34967 registers unless the user explicitly asks for it. So we do *not* define
34968 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34969 we have builtins invoked by mmintrin.h that gives us license to emit
34970 these sorts of instructions. */
34972 static rtx
34973 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34975 enum machine_mode tmode = TYPE_MODE (type);
34976 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34977 int i, n_elt = GET_MODE_NUNITS (tmode);
34978 rtvec v = rtvec_alloc (n_elt);
34980 gcc_assert (VECTOR_MODE_P (tmode));
34981 gcc_assert (call_expr_nargs (exp) == n_elt);
34983 for (i = 0; i < n_elt; ++i)
34985 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34986 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34989 if (!target || !register_operand (target, tmode))
34990 target = gen_reg_rtx (tmode);
34992 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34993 return target;
34996 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34997 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34998 had a language-level syntax for referencing vector elements. */
35000 static rtx
35001 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35003 enum machine_mode tmode, mode0;
35004 tree arg0, arg1;
35005 int elt;
35006 rtx op0;
35008 arg0 = CALL_EXPR_ARG (exp, 0);
35009 arg1 = CALL_EXPR_ARG (exp, 1);
35011 op0 = expand_normal (arg0);
35012 elt = get_element_number (TREE_TYPE (arg0), arg1);
35014 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35015 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35016 gcc_assert (VECTOR_MODE_P (mode0));
35018 op0 = force_reg (mode0, op0);
35020 if (optimize || !target || !register_operand (target, tmode))
35021 target = gen_reg_rtx (tmode);
35023 ix86_expand_vector_extract (true, target, op0, elt);
35025 return target;
35028 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35029 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35030 a language-level syntax for referencing vector elements. */
35032 static rtx
35033 ix86_expand_vec_set_builtin (tree exp)
35035 enum machine_mode tmode, mode1;
35036 tree arg0, arg1, arg2;
35037 int elt;
35038 rtx op0, op1, target;
35040 arg0 = CALL_EXPR_ARG (exp, 0);
35041 arg1 = CALL_EXPR_ARG (exp, 1);
35042 arg2 = CALL_EXPR_ARG (exp, 2);
35044 tmode = TYPE_MODE (TREE_TYPE (arg0));
35045 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35046 gcc_assert (VECTOR_MODE_P (tmode));
35048 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35049 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35050 elt = get_element_number (TREE_TYPE (arg0), arg2);
35052 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35053 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35055 op0 = force_reg (tmode, op0);
35056 op1 = force_reg (mode1, op1);
35058 /* OP0 is the source of these builtin functions and shouldn't be
35059 modified. Create a copy, use it and return it as target. */
35060 target = gen_reg_rtx (tmode);
35061 emit_move_insn (target, op0);
35062 ix86_expand_vector_set (true, target, op1, elt);
35064 return target;
35067 /* Expand an expression EXP that calls a built-in function,
35068 with result going to TARGET if that's convenient
35069 (and in mode MODE if that's convenient).
35070 SUBTARGET may be used as the target for computing one of EXP's operands.
35071 IGNORE is nonzero if the value is to be ignored. */
35073 static rtx
35074 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35075 enum machine_mode mode, int ignore)
35077 const struct builtin_description *d;
35078 size_t i;
35079 enum insn_code icode;
35080 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35081 tree arg0, arg1, arg2, arg3, arg4;
35082 rtx op0, op1, op2, op3, op4, pat, insn;
35083 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35084 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35086 /* For CPU builtins that can be folded, fold first and expand the fold. */
35087 switch (fcode)
35089 case IX86_BUILTIN_CPU_INIT:
35091 /* Make it call __cpu_indicator_init in libgcc. */
35092 tree call_expr, fndecl, type;
35093 type = build_function_type_list (integer_type_node, NULL_TREE);
35094 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35095 call_expr = build_call_expr (fndecl, 0);
35096 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35098 case IX86_BUILTIN_CPU_IS:
35099 case IX86_BUILTIN_CPU_SUPPORTS:
35101 tree arg0 = CALL_EXPR_ARG (exp, 0);
35102 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35103 gcc_assert (fold_expr != NULL_TREE);
35104 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35108 /* Determine whether the builtin function is available under the current ISA.
35109 Originally the builtin was not created if it wasn't applicable to the
35110 current ISA based on the command line switches. With function specific
35111 options, we need to check in the context of the function making the call
35112 whether it is supported. */
35113 if (ix86_builtins_isa[fcode].isa
35114 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35116 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35117 NULL, (enum fpmath_unit) 0, false);
35119 if (!opts)
35120 error ("%qE needs unknown isa option", fndecl);
35121 else
35123 gcc_assert (opts != NULL);
35124 error ("%qE needs isa option %s", fndecl, opts);
35125 free (opts);
35127 return const0_rtx;
35130 switch (fcode)
35132 case IX86_BUILTIN_MASKMOVQ:
35133 case IX86_BUILTIN_MASKMOVDQU:
35134 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35135 ? CODE_FOR_mmx_maskmovq
35136 : CODE_FOR_sse2_maskmovdqu);
35137 /* Note the arg order is different from the operand order. */
35138 arg1 = CALL_EXPR_ARG (exp, 0);
35139 arg2 = CALL_EXPR_ARG (exp, 1);
35140 arg0 = CALL_EXPR_ARG (exp, 2);
35141 op0 = expand_normal (arg0);
35142 op1 = expand_normal (arg1);
35143 op2 = expand_normal (arg2);
35144 mode0 = insn_data[icode].operand[0].mode;
35145 mode1 = insn_data[icode].operand[1].mode;
35146 mode2 = insn_data[icode].operand[2].mode;
35148 op0 = ix86_zero_extend_to_Pmode (op0);
35149 op0 = gen_rtx_MEM (mode1, op0);
35151 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35152 op0 = copy_to_mode_reg (mode0, op0);
35153 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35154 op1 = copy_to_mode_reg (mode1, op1);
35155 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35156 op2 = copy_to_mode_reg (mode2, op2);
35157 pat = GEN_FCN (icode) (op0, op1, op2);
35158 if (! pat)
35159 return 0;
35160 emit_insn (pat);
35161 return 0;
35163 case IX86_BUILTIN_LDMXCSR:
35164 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35165 target = assign_386_stack_local (SImode, SLOT_TEMP);
35166 emit_move_insn (target, op0);
35167 emit_insn (gen_sse_ldmxcsr (target));
35168 return 0;
35170 case IX86_BUILTIN_STMXCSR:
35171 target = assign_386_stack_local (SImode, SLOT_TEMP);
35172 emit_insn (gen_sse_stmxcsr (target));
35173 return copy_to_mode_reg (SImode, target);
35175 case IX86_BUILTIN_CLFLUSH:
35176 arg0 = CALL_EXPR_ARG (exp, 0);
35177 op0 = expand_normal (arg0);
35178 icode = CODE_FOR_sse2_clflush;
35179 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35180 op0 = ix86_zero_extend_to_Pmode (op0);
35182 emit_insn (gen_sse2_clflush (op0));
35183 return 0;
35185 case IX86_BUILTIN_MONITOR:
35186 arg0 = CALL_EXPR_ARG (exp, 0);
35187 arg1 = CALL_EXPR_ARG (exp, 1);
35188 arg2 = CALL_EXPR_ARG (exp, 2);
35189 op0 = expand_normal (arg0);
35190 op1 = expand_normal (arg1);
35191 op2 = expand_normal (arg2);
35192 if (!REG_P (op0))
35193 op0 = ix86_zero_extend_to_Pmode (op0);
35194 if (!REG_P (op1))
35195 op1 = copy_to_mode_reg (SImode, op1);
35196 if (!REG_P (op2))
35197 op2 = copy_to_mode_reg (SImode, op2);
35198 emit_insn (ix86_gen_monitor (op0, op1, op2));
35199 return 0;
35201 case IX86_BUILTIN_MWAIT:
35202 arg0 = CALL_EXPR_ARG (exp, 0);
35203 arg1 = CALL_EXPR_ARG (exp, 1);
35204 op0 = expand_normal (arg0);
35205 op1 = expand_normal (arg1);
35206 if (!REG_P (op0))
35207 op0 = copy_to_mode_reg (SImode, op0);
35208 if (!REG_P (op1))
35209 op1 = copy_to_mode_reg (SImode, op1);
35210 emit_insn (gen_sse3_mwait (op0, op1));
35211 return 0;
35213 case IX86_BUILTIN_VEC_INIT_V2SI:
35214 case IX86_BUILTIN_VEC_INIT_V4HI:
35215 case IX86_BUILTIN_VEC_INIT_V8QI:
35216 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35218 case IX86_BUILTIN_VEC_EXT_V2DF:
35219 case IX86_BUILTIN_VEC_EXT_V2DI:
35220 case IX86_BUILTIN_VEC_EXT_V4SF:
35221 case IX86_BUILTIN_VEC_EXT_V4SI:
35222 case IX86_BUILTIN_VEC_EXT_V8HI:
35223 case IX86_BUILTIN_VEC_EXT_V2SI:
35224 case IX86_BUILTIN_VEC_EXT_V4HI:
35225 case IX86_BUILTIN_VEC_EXT_V16QI:
35226 return ix86_expand_vec_ext_builtin (exp, target);
35228 case IX86_BUILTIN_VEC_SET_V2DI:
35229 case IX86_BUILTIN_VEC_SET_V4SF:
35230 case IX86_BUILTIN_VEC_SET_V4SI:
35231 case IX86_BUILTIN_VEC_SET_V8HI:
35232 case IX86_BUILTIN_VEC_SET_V4HI:
35233 case IX86_BUILTIN_VEC_SET_V16QI:
35234 return ix86_expand_vec_set_builtin (exp);
35236 case IX86_BUILTIN_INFQ:
35237 case IX86_BUILTIN_HUGE_VALQ:
35239 REAL_VALUE_TYPE inf;
35240 rtx tmp;
35242 real_inf (&inf);
35243 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35245 tmp = validize_mem (force_const_mem (mode, tmp));
35247 if (target == 0)
35248 target = gen_reg_rtx (mode);
35250 emit_move_insn (target, tmp);
35251 return target;
35254 case IX86_BUILTIN_RDPMC:
35255 case IX86_BUILTIN_RDTSC:
35256 case IX86_BUILTIN_RDTSCP:
35258 op0 = gen_reg_rtx (DImode);
35259 op1 = gen_reg_rtx (DImode);
35261 if (fcode == IX86_BUILTIN_RDPMC)
35263 arg0 = CALL_EXPR_ARG (exp, 0);
35264 op2 = expand_normal (arg0);
35265 if (!register_operand (op2, SImode))
35266 op2 = copy_to_mode_reg (SImode, op2);
35268 insn = (TARGET_64BIT
35269 ? gen_rdpmc_rex64 (op0, op1, op2)
35270 : gen_rdpmc (op0, op2));
35271 emit_insn (insn);
35273 else if (fcode == IX86_BUILTIN_RDTSC)
35275 insn = (TARGET_64BIT
35276 ? gen_rdtsc_rex64 (op0, op1)
35277 : gen_rdtsc (op0));
35278 emit_insn (insn);
35280 else
35282 op2 = gen_reg_rtx (SImode);
35284 insn = (TARGET_64BIT
35285 ? gen_rdtscp_rex64 (op0, op1, op2)
35286 : gen_rdtscp (op0, op2));
35287 emit_insn (insn);
35289 arg0 = CALL_EXPR_ARG (exp, 0);
35290 op4 = expand_normal (arg0);
35291 if (!address_operand (op4, VOIDmode))
35293 op4 = convert_memory_address (Pmode, op4);
35294 op4 = copy_addr_to_reg (op4);
35296 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35299 if (target == 0)
35301 /* mode is VOIDmode if __builtin_rd* has been called
35302 without lhs. */
35303 if (mode == VOIDmode)
35304 return target;
35305 target = gen_reg_rtx (mode);
35308 if (TARGET_64BIT)
35310 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35311 op1, 1, OPTAB_DIRECT);
35312 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35313 op0, 1, OPTAB_DIRECT);
35316 emit_move_insn (target, op0);
35317 return target;
35319 case IX86_BUILTIN_FXSAVE:
35320 case IX86_BUILTIN_FXRSTOR:
35321 case IX86_BUILTIN_FXSAVE64:
35322 case IX86_BUILTIN_FXRSTOR64:
35323 case IX86_BUILTIN_FNSTENV:
35324 case IX86_BUILTIN_FLDENV:
35325 case IX86_BUILTIN_FNSTSW:
35326 mode0 = BLKmode;
35327 switch (fcode)
35329 case IX86_BUILTIN_FXSAVE:
35330 icode = CODE_FOR_fxsave;
35331 break;
35332 case IX86_BUILTIN_FXRSTOR:
35333 icode = CODE_FOR_fxrstor;
35334 break;
35335 case IX86_BUILTIN_FXSAVE64:
35336 icode = CODE_FOR_fxsave64;
35337 break;
35338 case IX86_BUILTIN_FXRSTOR64:
35339 icode = CODE_FOR_fxrstor64;
35340 break;
35341 case IX86_BUILTIN_FNSTENV:
35342 icode = CODE_FOR_fnstenv;
35343 break;
35344 case IX86_BUILTIN_FLDENV:
35345 icode = CODE_FOR_fldenv;
35346 break;
35347 case IX86_BUILTIN_FNSTSW:
35348 icode = CODE_FOR_fnstsw;
35349 mode0 = HImode;
35350 break;
35351 default:
35352 gcc_unreachable ();
35355 arg0 = CALL_EXPR_ARG (exp, 0);
35356 op0 = expand_normal (arg0);
35358 if (!address_operand (op0, VOIDmode))
35360 op0 = convert_memory_address (Pmode, op0);
35361 op0 = copy_addr_to_reg (op0);
35363 op0 = gen_rtx_MEM (mode0, op0);
35365 pat = GEN_FCN (icode) (op0);
35366 if (pat)
35367 emit_insn (pat);
35368 return 0;
35370 case IX86_BUILTIN_XSAVE:
35371 case IX86_BUILTIN_XRSTOR:
35372 case IX86_BUILTIN_XSAVE64:
35373 case IX86_BUILTIN_XRSTOR64:
35374 case IX86_BUILTIN_XSAVEOPT:
35375 case IX86_BUILTIN_XSAVEOPT64:
35376 arg0 = CALL_EXPR_ARG (exp, 0);
35377 arg1 = CALL_EXPR_ARG (exp, 1);
35378 op0 = expand_normal (arg0);
35379 op1 = expand_normal (arg1);
35381 if (!address_operand (op0, VOIDmode))
35383 op0 = convert_memory_address (Pmode, op0);
35384 op0 = copy_addr_to_reg (op0);
35386 op0 = gen_rtx_MEM (BLKmode, op0);
35388 op1 = force_reg (DImode, op1);
35390 if (TARGET_64BIT)
35392 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35393 NULL, 1, OPTAB_DIRECT);
35394 switch (fcode)
35396 case IX86_BUILTIN_XSAVE:
35397 icode = CODE_FOR_xsave_rex64;
35398 break;
35399 case IX86_BUILTIN_XRSTOR:
35400 icode = CODE_FOR_xrstor_rex64;
35401 break;
35402 case IX86_BUILTIN_XSAVE64:
35403 icode = CODE_FOR_xsave64;
35404 break;
35405 case IX86_BUILTIN_XRSTOR64:
35406 icode = CODE_FOR_xrstor64;
35407 break;
35408 case IX86_BUILTIN_XSAVEOPT:
35409 icode = CODE_FOR_xsaveopt_rex64;
35410 break;
35411 case IX86_BUILTIN_XSAVEOPT64:
35412 icode = CODE_FOR_xsaveopt64;
35413 break;
35414 default:
35415 gcc_unreachable ();
35418 op2 = gen_lowpart (SImode, op2);
35419 op1 = gen_lowpart (SImode, op1);
35420 pat = GEN_FCN (icode) (op0, op1, op2);
35422 else
35424 switch (fcode)
35426 case IX86_BUILTIN_XSAVE:
35427 icode = CODE_FOR_xsave;
35428 break;
35429 case IX86_BUILTIN_XRSTOR:
35430 icode = CODE_FOR_xrstor;
35431 break;
35432 case IX86_BUILTIN_XSAVEOPT:
35433 icode = CODE_FOR_xsaveopt;
35434 break;
35435 default:
35436 gcc_unreachable ();
35438 pat = GEN_FCN (icode) (op0, op1);
35441 if (pat)
35442 emit_insn (pat);
35443 return 0;
35445 case IX86_BUILTIN_LLWPCB:
35446 arg0 = CALL_EXPR_ARG (exp, 0);
35447 op0 = expand_normal (arg0);
35448 icode = CODE_FOR_lwp_llwpcb;
35449 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35450 op0 = ix86_zero_extend_to_Pmode (op0);
35451 emit_insn (gen_lwp_llwpcb (op0));
35452 return 0;
35454 case IX86_BUILTIN_SLWPCB:
35455 icode = CODE_FOR_lwp_slwpcb;
35456 if (!target
35457 || !insn_data[icode].operand[0].predicate (target, Pmode))
35458 target = gen_reg_rtx (Pmode);
35459 emit_insn (gen_lwp_slwpcb (target));
35460 return target;
35462 case IX86_BUILTIN_BEXTRI32:
35463 case IX86_BUILTIN_BEXTRI64:
35464 arg0 = CALL_EXPR_ARG (exp, 0);
35465 arg1 = CALL_EXPR_ARG (exp, 1);
35466 op0 = expand_normal (arg0);
35467 op1 = expand_normal (arg1);
35468 icode = (fcode == IX86_BUILTIN_BEXTRI32
35469 ? CODE_FOR_tbm_bextri_si
35470 : CODE_FOR_tbm_bextri_di);
35471 if (!CONST_INT_P (op1))
35473 error ("last argument must be an immediate");
35474 return const0_rtx;
35476 else
35478 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35479 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35480 op1 = GEN_INT (length);
35481 op2 = GEN_INT (lsb_index);
35482 pat = GEN_FCN (icode) (target, op0, op1, op2);
35483 if (pat)
35484 emit_insn (pat);
35485 return target;
35488 case IX86_BUILTIN_RDRAND16_STEP:
35489 icode = CODE_FOR_rdrandhi_1;
35490 mode0 = HImode;
35491 goto rdrand_step;
35493 case IX86_BUILTIN_RDRAND32_STEP:
35494 icode = CODE_FOR_rdrandsi_1;
35495 mode0 = SImode;
35496 goto rdrand_step;
35498 case IX86_BUILTIN_RDRAND64_STEP:
35499 icode = CODE_FOR_rdranddi_1;
35500 mode0 = DImode;
35502 rdrand_step:
35503 op0 = gen_reg_rtx (mode0);
35504 emit_insn (GEN_FCN (icode) (op0));
35506 arg0 = CALL_EXPR_ARG (exp, 0);
35507 op1 = expand_normal (arg0);
35508 if (!address_operand (op1, VOIDmode))
35510 op1 = convert_memory_address (Pmode, op1);
35511 op1 = copy_addr_to_reg (op1);
35513 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35515 op1 = gen_reg_rtx (SImode);
35516 emit_move_insn (op1, CONST1_RTX (SImode));
35518 /* Emit SImode conditional move. */
35519 if (mode0 == HImode)
35521 op2 = gen_reg_rtx (SImode);
35522 emit_insn (gen_zero_extendhisi2 (op2, op0));
35524 else if (mode0 == SImode)
35525 op2 = op0;
35526 else
35527 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35529 if (target == 0
35530 || !register_operand (target, SImode))
35531 target = gen_reg_rtx (SImode);
35533 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35534 const0_rtx);
35535 emit_insn (gen_rtx_SET (VOIDmode, target,
35536 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35537 return target;
35539 case IX86_BUILTIN_RDSEED16_STEP:
35540 icode = CODE_FOR_rdseedhi_1;
35541 mode0 = HImode;
35542 goto rdseed_step;
35544 case IX86_BUILTIN_RDSEED32_STEP:
35545 icode = CODE_FOR_rdseedsi_1;
35546 mode0 = SImode;
35547 goto rdseed_step;
35549 case IX86_BUILTIN_RDSEED64_STEP:
35550 icode = CODE_FOR_rdseeddi_1;
35551 mode0 = DImode;
35553 rdseed_step:
35554 op0 = gen_reg_rtx (mode0);
35555 emit_insn (GEN_FCN (icode) (op0));
35557 arg0 = CALL_EXPR_ARG (exp, 0);
35558 op1 = expand_normal (arg0);
35559 if (!address_operand (op1, VOIDmode))
35561 op1 = convert_memory_address (Pmode, op1);
35562 op1 = copy_addr_to_reg (op1);
35564 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35566 op2 = gen_reg_rtx (QImode);
35568 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35569 const0_rtx);
35570 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35572 if (target == 0
35573 || !register_operand (target, SImode))
35574 target = gen_reg_rtx (SImode);
35576 emit_insn (gen_zero_extendqisi2 (target, op2));
35577 return target;
35579 case IX86_BUILTIN_ADDCARRYX32:
35580 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35581 mode0 = SImode;
35582 goto addcarryx;
35584 case IX86_BUILTIN_ADDCARRYX64:
35585 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35586 mode0 = DImode;
35588 addcarryx:
35589 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35590 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35591 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35592 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35594 op0 = gen_reg_rtx (QImode);
35596 /* Generate CF from input operand. */
35597 op1 = expand_normal (arg0);
35598 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35599 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35601 /* Gen ADCX instruction to compute X+Y+CF. */
35602 op2 = expand_normal (arg1);
35603 op3 = expand_normal (arg2);
35605 if (!REG_P (op2))
35606 op2 = copy_to_mode_reg (mode0, op2);
35607 if (!REG_P (op3))
35608 op3 = copy_to_mode_reg (mode0, op3);
35610 op0 = gen_reg_rtx (mode0);
35612 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35613 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35614 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35616 /* Store the result. */
35617 op4 = expand_normal (arg3);
35618 if (!address_operand (op4, VOIDmode))
35620 op4 = convert_memory_address (Pmode, op4);
35621 op4 = copy_addr_to_reg (op4);
35623 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35625 /* Return current CF value. */
35626 if (target == 0)
35627 target = gen_reg_rtx (QImode);
35629 PUT_MODE (pat, QImode);
35630 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35631 return target;
35633 case IX86_BUILTIN_READ_FLAGS:
35634 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35636 if (optimize
35637 || target == NULL_RTX
35638 || !nonimmediate_operand (target, word_mode)
35639 || GET_MODE (target) != word_mode)
35640 target = gen_reg_rtx (word_mode);
35642 emit_insn (gen_pop (target));
35643 return target;
35645 case IX86_BUILTIN_WRITE_FLAGS:
35647 arg0 = CALL_EXPR_ARG (exp, 0);
35648 op0 = expand_normal (arg0);
35649 if (!general_no_elim_operand (op0, word_mode))
35650 op0 = copy_to_mode_reg (word_mode, op0);
35652 emit_insn (gen_push (op0));
35653 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35654 return 0;
35656 case IX86_BUILTIN_KORTESTC16:
35657 icode = CODE_FOR_kortestchi;
35658 mode0 = HImode;
35659 mode1 = CCCmode;
35660 goto kortest;
35662 case IX86_BUILTIN_KORTESTZ16:
35663 icode = CODE_FOR_kortestzhi;
35664 mode0 = HImode;
35665 mode1 = CCZmode;
35667 kortest:
35668 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35669 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35670 op0 = expand_normal (arg0);
35671 op1 = expand_normal (arg1);
35673 op0 = copy_to_reg (op0);
35674 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35675 op1 = copy_to_reg (op1);
35676 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35678 target = gen_reg_rtx (QImode);
35679 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35681 /* Emit kortest. */
35682 emit_insn (GEN_FCN (icode) (op0, op1));
35683 /* And use setcc to return result from flags. */
35684 ix86_expand_setcc (target, EQ,
35685 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35686 return target;
35688 case IX86_BUILTIN_GATHERSIV2DF:
35689 icode = CODE_FOR_avx2_gathersiv2df;
35690 goto gather_gen;
35691 case IX86_BUILTIN_GATHERSIV4DF:
35692 icode = CODE_FOR_avx2_gathersiv4df;
35693 goto gather_gen;
35694 case IX86_BUILTIN_GATHERDIV2DF:
35695 icode = CODE_FOR_avx2_gatherdiv2df;
35696 goto gather_gen;
35697 case IX86_BUILTIN_GATHERDIV4DF:
35698 icode = CODE_FOR_avx2_gatherdiv4df;
35699 goto gather_gen;
35700 case IX86_BUILTIN_GATHERSIV4SF:
35701 icode = CODE_FOR_avx2_gathersiv4sf;
35702 goto gather_gen;
35703 case IX86_BUILTIN_GATHERSIV8SF:
35704 icode = CODE_FOR_avx2_gathersiv8sf;
35705 goto gather_gen;
35706 case IX86_BUILTIN_GATHERDIV4SF:
35707 icode = CODE_FOR_avx2_gatherdiv4sf;
35708 goto gather_gen;
35709 case IX86_BUILTIN_GATHERDIV8SF:
35710 icode = CODE_FOR_avx2_gatherdiv8sf;
35711 goto gather_gen;
35712 case IX86_BUILTIN_GATHERSIV2DI:
35713 icode = CODE_FOR_avx2_gathersiv2di;
35714 goto gather_gen;
35715 case IX86_BUILTIN_GATHERSIV4DI:
35716 icode = CODE_FOR_avx2_gathersiv4di;
35717 goto gather_gen;
35718 case IX86_BUILTIN_GATHERDIV2DI:
35719 icode = CODE_FOR_avx2_gatherdiv2di;
35720 goto gather_gen;
35721 case IX86_BUILTIN_GATHERDIV4DI:
35722 icode = CODE_FOR_avx2_gatherdiv4di;
35723 goto gather_gen;
35724 case IX86_BUILTIN_GATHERSIV4SI:
35725 icode = CODE_FOR_avx2_gathersiv4si;
35726 goto gather_gen;
35727 case IX86_BUILTIN_GATHERSIV8SI:
35728 icode = CODE_FOR_avx2_gathersiv8si;
35729 goto gather_gen;
35730 case IX86_BUILTIN_GATHERDIV4SI:
35731 icode = CODE_FOR_avx2_gatherdiv4si;
35732 goto gather_gen;
35733 case IX86_BUILTIN_GATHERDIV8SI:
35734 icode = CODE_FOR_avx2_gatherdiv8si;
35735 goto gather_gen;
35736 case IX86_BUILTIN_GATHERALTSIV4DF:
35737 icode = CODE_FOR_avx2_gathersiv4df;
35738 goto gather_gen;
35739 case IX86_BUILTIN_GATHERALTDIV8SF:
35740 icode = CODE_FOR_avx2_gatherdiv8sf;
35741 goto gather_gen;
35742 case IX86_BUILTIN_GATHERALTSIV4DI:
35743 icode = CODE_FOR_avx2_gathersiv4di;
35744 goto gather_gen;
35745 case IX86_BUILTIN_GATHERALTDIV8SI:
35746 icode = CODE_FOR_avx2_gatherdiv8si;
35747 goto gather_gen;
35748 case IX86_BUILTIN_GATHER3SIV16SF:
35749 icode = CODE_FOR_avx512f_gathersiv16sf;
35750 goto gather_gen;
35751 case IX86_BUILTIN_GATHER3SIV8DF:
35752 icode = CODE_FOR_avx512f_gathersiv8df;
35753 goto gather_gen;
35754 case IX86_BUILTIN_GATHER3DIV16SF:
35755 icode = CODE_FOR_avx512f_gatherdiv16sf;
35756 goto gather_gen;
35757 case IX86_BUILTIN_GATHER3DIV8DF:
35758 icode = CODE_FOR_avx512f_gatherdiv8df;
35759 goto gather_gen;
35760 case IX86_BUILTIN_GATHER3SIV16SI:
35761 icode = CODE_FOR_avx512f_gathersiv16si;
35762 goto gather_gen;
35763 case IX86_BUILTIN_GATHER3SIV8DI:
35764 icode = CODE_FOR_avx512f_gathersiv8di;
35765 goto gather_gen;
35766 case IX86_BUILTIN_GATHER3DIV16SI:
35767 icode = CODE_FOR_avx512f_gatherdiv16si;
35768 goto gather_gen;
35769 case IX86_BUILTIN_GATHER3DIV8DI:
35770 icode = CODE_FOR_avx512f_gatherdiv8di;
35771 goto gather_gen;
35772 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35773 icode = CODE_FOR_avx512f_gathersiv8df;
35774 goto gather_gen;
35775 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35776 icode = CODE_FOR_avx512f_gatherdiv16sf;
35777 goto gather_gen;
35778 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35779 icode = CODE_FOR_avx512f_gathersiv8di;
35780 goto gather_gen;
35781 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35782 icode = CODE_FOR_avx512f_gatherdiv16si;
35783 goto gather_gen;
35784 case IX86_BUILTIN_SCATTERSIV16SF:
35785 icode = CODE_FOR_avx512f_scattersiv16sf;
35786 goto scatter_gen;
35787 case IX86_BUILTIN_SCATTERSIV8DF:
35788 icode = CODE_FOR_avx512f_scattersiv8df;
35789 goto scatter_gen;
35790 case IX86_BUILTIN_SCATTERDIV16SF:
35791 icode = CODE_FOR_avx512f_scatterdiv16sf;
35792 goto scatter_gen;
35793 case IX86_BUILTIN_SCATTERDIV8DF:
35794 icode = CODE_FOR_avx512f_scatterdiv8df;
35795 goto scatter_gen;
35796 case IX86_BUILTIN_SCATTERSIV16SI:
35797 icode = CODE_FOR_avx512f_scattersiv16si;
35798 goto scatter_gen;
35799 case IX86_BUILTIN_SCATTERSIV8DI:
35800 icode = CODE_FOR_avx512f_scattersiv8di;
35801 goto scatter_gen;
35802 case IX86_BUILTIN_SCATTERDIV16SI:
35803 icode = CODE_FOR_avx512f_scatterdiv16si;
35804 goto scatter_gen;
35805 case IX86_BUILTIN_SCATTERDIV8DI:
35806 icode = CODE_FOR_avx512f_scatterdiv8di;
35807 goto scatter_gen;
35809 case IX86_BUILTIN_GATHERPFDPD:
35810 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35811 goto vec_prefetch_gen;
35812 case IX86_BUILTIN_GATHERPFDPS:
35813 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35814 goto vec_prefetch_gen;
35815 case IX86_BUILTIN_GATHERPFQPD:
35816 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35817 goto vec_prefetch_gen;
35818 case IX86_BUILTIN_GATHERPFQPS:
35819 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35820 goto vec_prefetch_gen;
35821 case IX86_BUILTIN_SCATTERPFDPD:
35822 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35823 goto vec_prefetch_gen;
35824 case IX86_BUILTIN_SCATTERPFDPS:
35825 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35826 goto vec_prefetch_gen;
35827 case IX86_BUILTIN_SCATTERPFQPD:
35828 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35829 goto vec_prefetch_gen;
35830 case IX86_BUILTIN_SCATTERPFQPS:
35831 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35832 goto vec_prefetch_gen;
35834 gather_gen:
35835 rtx half;
35836 rtx (*gen) (rtx, rtx);
35838 arg0 = CALL_EXPR_ARG (exp, 0);
35839 arg1 = CALL_EXPR_ARG (exp, 1);
35840 arg2 = CALL_EXPR_ARG (exp, 2);
35841 arg3 = CALL_EXPR_ARG (exp, 3);
35842 arg4 = CALL_EXPR_ARG (exp, 4);
35843 op0 = expand_normal (arg0);
35844 op1 = expand_normal (arg1);
35845 op2 = expand_normal (arg2);
35846 op3 = expand_normal (arg3);
35847 op4 = expand_normal (arg4);
35848 /* Note the arg order is different from the operand order. */
35849 mode0 = insn_data[icode].operand[1].mode;
35850 mode2 = insn_data[icode].operand[3].mode;
35851 mode3 = insn_data[icode].operand[4].mode;
35852 mode4 = insn_data[icode].operand[5].mode;
35854 if (target == NULL_RTX
35855 || GET_MODE (target) != insn_data[icode].operand[0].mode
35856 || !insn_data[icode].operand[0].predicate (target,
35857 GET_MODE (target)))
35858 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35859 else
35860 subtarget = target;
35862 switch (fcode)
35864 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35865 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35866 half = gen_reg_rtx (V8SImode);
35867 if (!nonimmediate_operand (op2, V16SImode))
35868 op2 = copy_to_mode_reg (V16SImode, op2);
35869 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35870 op2 = half;
35871 break;
35872 case IX86_BUILTIN_GATHERALTSIV4DF:
35873 case IX86_BUILTIN_GATHERALTSIV4DI:
35874 half = gen_reg_rtx (V4SImode);
35875 if (!nonimmediate_operand (op2, V8SImode))
35876 op2 = copy_to_mode_reg (V8SImode, op2);
35877 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35878 op2 = half;
35879 break;
35880 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35881 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35882 half = gen_reg_rtx (mode0);
35883 if (mode0 == V8SFmode)
35884 gen = gen_vec_extract_lo_v16sf;
35885 else
35886 gen = gen_vec_extract_lo_v16si;
35887 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35888 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35889 emit_insn (gen (half, op0));
35890 op0 = half;
35891 if (GET_MODE (op3) != VOIDmode)
35893 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35894 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35895 emit_insn (gen (half, op3));
35896 op3 = half;
35898 break;
35899 case IX86_BUILTIN_GATHERALTDIV8SF:
35900 case IX86_BUILTIN_GATHERALTDIV8SI:
35901 half = gen_reg_rtx (mode0);
35902 if (mode0 == V4SFmode)
35903 gen = gen_vec_extract_lo_v8sf;
35904 else
35905 gen = gen_vec_extract_lo_v8si;
35906 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35907 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35908 emit_insn (gen (half, op0));
35909 op0 = half;
35910 if (GET_MODE (op3) != VOIDmode)
35912 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35913 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35914 emit_insn (gen (half, op3));
35915 op3 = half;
35917 break;
35918 default:
35919 break;
35922 /* Force memory operand only with base register here. But we
35923 don't want to do it on memory operand for other builtin
35924 functions. */
35925 op1 = ix86_zero_extend_to_Pmode (op1);
35927 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35928 op0 = copy_to_mode_reg (mode0, op0);
35929 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35930 op1 = copy_to_mode_reg (Pmode, op1);
35931 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35932 op2 = copy_to_mode_reg (mode2, op2);
35934 op3 = fixup_modeless_constant (op3, mode3);
35936 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35938 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35939 op3 = copy_to_mode_reg (mode3, op3);
35941 else
35943 op3 = copy_to_reg (op3);
35944 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35946 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35948 error ("the last argument must be scale 1, 2, 4, 8");
35949 return const0_rtx;
35952 /* Optimize. If mask is known to have all high bits set,
35953 replace op0 with pc_rtx to signal that the instruction
35954 overwrites the whole destination and doesn't use its
35955 previous contents. */
35956 if (optimize)
35958 if (TREE_CODE (arg3) == INTEGER_CST)
35960 if (integer_all_onesp (arg3))
35961 op0 = pc_rtx;
35963 else if (TREE_CODE (arg3) == VECTOR_CST)
35965 unsigned int negative = 0;
35966 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35968 tree cst = VECTOR_CST_ELT (arg3, i);
35969 if (TREE_CODE (cst) == INTEGER_CST
35970 && tree_int_cst_sign_bit (cst))
35971 negative++;
35972 else if (TREE_CODE (cst) == REAL_CST
35973 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35974 negative++;
35976 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35977 op0 = pc_rtx;
35979 else if (TREE_CODE (arg3) == SSA_NAME
35980 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35982 /* Recognize also when mask is like:
35983 __v2df src = _mm_setzero_pd ();
35984 __v2df mask = _mm_cmpeq_pd (src, src);
35986 __v8sf src = _mm256_setzero_ps ();
35987 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35988 as that is a cheaper way to load all ones into
35989 a register than having to load a constant from
35990 memory. */
35991 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35992 if (is_gimple_call (def_stmt))
35994 tree fndecl = gimple_call_fndecl (def_stmt);
35995 if (fndecl
35996 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35997 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35999 case IX86_BUILTIN_CMPPD:
36000 case IX86_BUILTIN_CMPPS:
36001 case IX86_BUILTIN_CMPPD256:
36002 case IX86_BUILTIN_CMPPS256:
36003 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36004 break;
36005 /* FALLTHRU */
36006 case IX86_BUILTIN_CMPEQPD:
36007 case IX86_BUILTIN_CMPEQPS:
36008 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36009 && initializer_zerop (gimple_call_arg (def_stmt,
36010 1)))
36011 op0 = pc_rtx;
36012 break;
36013 default:
36014 break;
36020 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36021 if (! pat)
36022 return const0_rtx;
36023 emit_insn (pat);
36025 switch (fcode)
36027 case IX86_BUILTIN_GATHER3DIV16SF:
36028 if (target == NULL_RTX)
36029 target = gen_reg_rtx (V8SFmode);
36030 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36031 break;
36032 case IX86_BUILTIN_GATHER3DIV16SI:
36033 if (target == NULL_RTX)
36034 target = gen_reg_rtx (V8SImode);
36035 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36036 break;
36037 case IX86_BUILTIN_GATHERDIV8SF:
36038 if (target == NULL_RTX)
36039 target = gen_reg_rtx (V4SFmode);
36040 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36041 break;
36042 case IX86_BUILTIN_GATHERDIV8SI:
36043 if (target == NULL_RTX)
36044 target = gen_reg_rtx (V4SImode);
36045 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36046 break;
36047 default:
36048 target = subtarget;
36049 break;
36051 return target;
36053 scatter_gen:
36054 arg0 = CALL_EXPR_ARG (exp, 0);
36055 arg1 = CALL_EXPR_ARG (exp, 1);
36056 arg2 = CALL_EXPR_ARG (exp, 2);
36057 arg3 = CALL_EXPR_ARG (exp, 3);
36058 arg4 = CALL_EXPR_ARG (exp, 4);
36059 op0 = expand_normal (arg0);
36060 op1 = expand_normal (arg1);
36061 op2 = expand_normal (arg2);
36062 op3 = expand_normal (arg3);
36063 op4 = expand_normal (arg4);
36064 mode1 = insn_data[icode].operand[1].mode;
36065 mode2 = insn_data[icode].operand[2].mode;
36066 mode3 = insn_data[icode].operand[3].mode;
36067 mode4 = insn_data[icode].operand[4].mode;
36069 /* Force memory operand only with base register here. But we
36070 don't want to do it on memory operand for other builtin
36071 functions. */
36072 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36074 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36075 op0 = copy_to_mode_reg (Pmode, op0);
36077 op1 = fixup_modeless_constant (op1, mode1);
36079 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36081 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36082 op1 = copy_to_mode_reg (mode1, op1);
36084 else
36086 op1 = copy_to_reg (op1);
36087 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36090 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36091 op2 = copy_to_mode_reg (mode2, op2);
36093 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36094 op3 = copy_to_mode_reg (mode3, op3);
36096 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36098 error ("the last argument must be scale 1, 2, 4, 8");
36099 return const0_rtx;
36102 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36103 if (! pat)
36104 return const0_rtx;
36106 emit_insn (pat);
36107 return 0;
36109 vec_prefetch_gen:
36110 arg0 = CALL_EXPR_ARG (exp, 0);
36111 arg1 = CALL_EXPR_ARG (exp, 1);
36112 arg2 = CALL_EXPR_ARG (exp, 2);
36113 arg3 = CALL_EXPR_ARG (exp, 3);
36114 arg4 = CALL_EXPR_ARG (exp, 4);
36115 op0 = expand_normal (arg0);
36116 op1 = expand_normal (arg1);
36117 op2 = expand_normal (arg2);
36118 op3 = expand_normal (arg3);
36119 op4 = expand_normal (arg4);
36120 mode0 = insn_data[icode].operand[0].mode;
36121 mode1 = insn_data[icode].operand[1].mode;
36122 mode3 = insn_data[icode].operand[3].mode;
36123 mode4 = insn_data[icode].operand[4].mode;
36125 op0 = fixup_modeless_constant (op0, mode0);
36127 if (GET_MODE (op0) == mode0
36128 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36130 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36131 op0 = copy_to_mode_reg (mode0, op0);
36133 else if (op0 != constm1_rtx)
36135 op0 = copy_to_reg (op0);
36136 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36139 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36140 op1 = copy_to_mode_reg (mode1, op1);
36142 /* Force memory operand only with base register here. But we
36143 don't want to do it on memory operand for other builtin
36144 functions. */
36145 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36147 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36148 op2 = copy_to_mode_reg (Pmode, op2);
36150 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36152 error ("the forth argument must be scale 1, 2, 4, 8");
36153 return const0_rtx;
36156 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36158 error ("incorrect hint operand");
36159 return const0_rtx;
36162 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36163 if (! pat)
36164 return const0_rtx;
36166 emit_insn (pat);
36168 return 0;
36170 case IX86_BUILTIN_XABORT:
36171 icode = CODE_FOR_xabort;
36172 arg0 = CALL_EXPR_ARG (exp, 0);
36173 op0 = expand_normal (arg0);
36174 mode0 = insn_data[icode].operand[0].mode;
36175 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36177 error ("the xabort's argument must be an 8-bit immediate");
36178 return const0_rtx;
36180 emit_insn (gen_xabort (op0));
36181 return 0;
36183 default:
36184 break;
36187 for (i = 0, d = bdesc_special_args;
36188 i < ARRAY_SIZE (bdesc_special_args);
36189 i++, d++)
36190 if (d->code == fcode)
36191 return ix86_expand_special_args_builtin (d, exp, target);
36193 for (i = 0, d = bdesc_args;
36194 i < ARRAY_SIZE (bdesc_args);
36195 i++, d++)
36196 if (d->code == fcode)
36197 switch (fcode)
36199 case IX86_BUILTIN_FABSQ:
36200 case IX86_BUILTIN_COPYSIGNQ:
36201 if (!TARGET_SSE)
36202 /* Emit a normal call if SSE isn't available. */
36203 return expand_call (exp, target, ignore);
36204 default:
36205 return ix86_expand_args_builtin (d, exp, target);
36208 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36209 if (d->code == fcode)
36210 return ix86_expand_sse_comi (d, exp, target);
36212 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36213 if (d->code == fcode)
36214 return ix86_expand_round_builtin (d, exp, target);
36216 for (i = 0, d = bdesc_pcmpestr;
36217 i < ARRAY_SIZE (bdesc_pcmpestr);
36218 i++, d++)
36219 if (d->code == fcode)
36220 return ix86_expand_sse_pcmpestr (d, exp, target);
36222 for (i = 0, d = bdesc_pcmpistr;
36223 i < ARRAY_SIZE (bdesc_pcmpistr);
36224 i++, d++)
36225 if (d->code == fcode)
36226 return ix86_expand_sse_pcmpistr (d, exp, target);
36228 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36229 if (d->code == fcode)
36230 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36231 (enum ix86_builtin_func_type)
36232 d->flag, d->comparison);
36234 gcc_unreachable ();
36237 /* This returns the target-specific builtin with code CODE if
36238 current_function_decl has visibility on this builtin, which is checked
36239 using isa flags. Returns NULL_TREE otherwise. */
36241 static tree ix86_get_builtin (enum ix86_builtins code)
36243 struct cl_target_option *opts;
36244 tree target_tree = NULL_TREE;
36246 /* Determine the isa flags of current_function_decl. */
36248 if (current_function_decl)
36249 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36251 if (target_tree == NULL)
36252 target_tree = target_option_default_node;
36254 opts = TREE_TARGET_OPTION (target_tree);
36256 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36257 return ix86_builtin_decl (code, true);
36258 else
36259 return NULL_TREE;
36262 /* Returns a function decl for a vectorized version of the builtin function
36263 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36264 if it is not available. */
36266 static tree
36267 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36268 tree type_in)
36270 enum machine_mode in_mode, out_mode;
36271 int in_n, out_n;
36272 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36274 if (TREE_CODE (type_out) != VECTOR_TYPE
36275 || TREE_CODE (type_in) != VECTOR_TYPE
36276 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36277 return NULL_TREE;
36279 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36280 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36281 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36282 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36284 switch (fn)
36286 case BUILT_IN_SQRT:
36287 if (out_mode == DFmode && in_mode == DFmode)
36289 if (out_n == 2 && in_n == 2)
36290 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36291 else if (out_n == 4 && in_n == 4)
36292 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36293 else if (out_n == 8 && in_n == 8)
36294 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36296 break;
36298 case BUILT_IN_EXP2F:
36299 if (out_mode == SFmode && in_mode == SFmode)
36301 if (out_n == 16 && in_n == 16)
36302 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36304 break;
36306 case BUILT_IN_SQRTF:
36307 if (out_mode == SFmode && in_mode == SFmode)
36309 if (out_n == 4 && in_n == 4)
36310 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36311 else if (out_n == 8 && in_n == 8)
36312 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36313 else if (out_n == 16 && in_n == 16)
36314 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36316 break;
36318 case BUILT_IN_IFLOOR:
36319 case BUILT_IN_LFLOOR:
36320 case BUILT_IN_LLFLOOR:
36321 /* The round insn does not trap on denormals. */
36322 if (flag_trapping_math || !TARGET_ROUND)
36323 break;
36325 if (out_mode == SImode && in_mode == DFmode)
36327 if (out_n == 4 && in_n == 2)
36328 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36329 else if (out_n == 8 && in_n == 4)
36330 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36331 else if (out_n == 16 && in_n == 8)
36332 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36334 break;
36336 case BUILT_IN_IFLOORF:
36337 case BUILT_IN_LFLOORF:
36338 case BUILT_IN_LLFLOORF:
36339 /* The round insn does not trap on denormals. */
36340 if (flag_trapping_math || !TARGET_ROUND)
36341 break;
36343 if (out_mode == SImode && in_mode == SFmode)
36345 if (out_n == 4 && in_n == 4)
36346 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36347 else if (out_n == 8 && in_n == 8)
36348 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36350 break;
36352 case BUILT_IN_ICEIL:
36353 case BUILT_IN_LCEIL:
36354 case BUILT_IN_LLCEIL:
36355 /* The round insn does not trap on denormals. */
36356 if (flag_trapping_math || !TARGET_ROUND)
36357 break;
36359 if (out_mode == SImode && in_mode == DFmode)
36361 if (out_n == 4 && in_n == 2)
36362 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36363 else if (out_n == 8 && in_n == 4)
36364 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36365 else if (out_n == 16 && in_n == 8)
36366 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36368 break;
36370 case BUILT_IN_ICEILF:
36371 case BUILT_IN_LCEILF:
36372 case BUILT_IN_LLCEILF:
36373 /* The round insn does not trap on denormals. */
36374 if (flag_trapping_math || !TARGET_ROUND)
36375 break;
36377 if (out_mode == SImode && in_mode == SFmode)
36379 if (out_n == 4 && in_n == 4)
36380 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36381 else if (out_n == 8 && in_n == 8)
36382 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36384 break;
36386 case BUILT_IN_IRINT:
36387 case BUILT_IN_LRINT:
36388 case BUILT_IN_LLRINT:
36389 if (out_mode == SImode && in_mode == DFmode)
36391 if (out_n == 4 && in_n == 2)
36392 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36393 else if (out_n == 8 && in_n == 4)
36394 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36396 break;
36398 case BUILT_IN_IRINTF:
36399 case BUILT_IN_LRINTF:
36400 case BUILT_IN_LLRINTF:
36401 if (out_mode == SImode && in_mode == SFmode)
36403 if (out_n == 4 && in_n == 4)
36404 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36405 else if (out_n == 8 && in_n == 8)
36406 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36408 break;
36410 case BUILT_IN_IROUND:
36411 case BUILT_IN_LROUND:
36412 case BUILT_IN_LLROUND:
36413 /* The round insn does not trap on denormals. */
36414 if (flag_trapping_math || !TARGET_ROUND)
36415 break;
36417 if (out_mode == SImode && in_mode == DFmode)
36419 if (out_n == 4 && in_n == 2)
36420 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36421 else if (out_n == 8 && in_n == 4)
36422 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36423 else if (out_n == 16 && in_n == 8)
36424 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36426 break;
36428 case BUILT_IN_IROUNDF:
36429 case BUILT_IN_LROUNDF:
36430 case BUILT_IN_LLROUNDF:
36431 /* The round insn does not trap on denormals. */
36432 if (flag_trapping_math || !TARGET_ROUND)
36433 break;
36435 if (out_mode == SImode && in_mode == SFmode)
36437 if (out_n == 4 && in_n == 4)
36438 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36439 else if (out_n == 8 && in_n == 8)
36440 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36442 break;
36444 case BUILT_IN_COPYSIGN:
36445 if (out_mode == DFmode && in_mode == DFmode)
36447 if (out_n == 2 && in_n == 2)
36448 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36449 else if (out_n == 4 && in_n == 4)
36450 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36451 else if (out_n == 8 && in_n == 8)
36452 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36454 break;
36456 case BUILT_IN_COPYSIGNF:
36457 if (out_mode == SFmode && in_mode == SFmode)
36459 if (out_n == 4 && in_n == 4)
36460 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36461 else if (out_n == 8 && in_n == 8)
36462 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36463 else if (out_n == 16 && in_n == 16)
36464 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36466 break;
36468 case BUILT_IN_FLOOR:
36469 /* The round insn does not trap on denormals. */
36470 if (flag_trapping_math || !TARGET_ROUND)
36471 break;
36473 if (out_mode == DFmode && in_mode == DFmode)
36475 if (out_n == 2 && in_n == 2)
36476 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36477 else if (out_n == 4 && in_n == 4)
36478 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36480 break;
36482 case BUILT_IN_FLOORF:
36483 /* The round insn does not trap on denormals. */
36484 if (flag_trapping_math || !TARGET_ROUND)
36485 break;
36487 if (out_mode == SFmode && in_mode == SFmode)
36489 if (out_n == 4 && in_n == 4)
36490 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36491 else if (out_n == 8 && in_n == 8)
36492 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36494 break;
36496 case BUILT_IN_CEIL:
36497 /* The round insn does not trap on denormals. */
36498 if (flag_trapping_math || !TARGET_ROUND)
36499 break;
36501 if (out_mode == DFmode && in_mode == DFmode)
36503 if (out_n == 2 && in_n == 2)
36504 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36505 else if (out_n == 4 && in_n == 4)
36506 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36508 break;
36510 case BUILT_IN_CEILF:
36511 /* The round insn does not trap on denormals. */
36512 if (flag_trapping_math || !TARGET_ROUND)
36513 break;
36515 if (out_mode == SFmode && in_mode == SFmode)
36517 if (out_n == 4 && in_n == 4)
36518 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36519 else if (out_n == 8 && in_n == 8)
36520 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36522 break;
36524 case BUILT_IN_TRUNC:
36525 /* The round insn does not trap on denormals. */
36526 if (flag_trapping_math || !TARGET_ROUND)
36527 break;
36529 if (out_mode == DFmode && in_mode == DFmode)
36531 if (out_n == 2 && in_n == 2)
36532 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36533 else if (out_n == 4 && in_n == 4)
36534 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36536 break;
36538 case BUILT_IN_TRUNCF:
36539 /* The round insn does not trap on denormals. */
36540 if (flag_trapping_math || !TARGET_ROUND)
36541 break;
36543 if (out_mode == SFmode && in_mode == SFmode)
36545 if (out_n == 4 && in_n == 4)
36546 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36547 else if (out_n == 8 && in_n == 8)
36548 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36550 break;
36552 case BUILT_IN_RINT:
36553 /* The round insn does not trap on denormals. */
36554 if (flag_trapping_math || !TARGET_ROUND)
36555 break;
36557 if (out_mode == DFmode && in_mode == DFmode)
36559 if (out_n == 2 && in_n == 2)
36560 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36561 else if (out_n == 4 && in_n == 4)
36562 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36564 break;
36566 case BUILT_IN_RINTF:
36567 /* The round insn does not trap on denormals. */
36568 if (flag_trapping_math || !TARGET_ROUND)
36569 break;
36571 if (out_mode == SFmode && in_mode == SFmode)
36573 if (out_n == 4 && in_n == 4)
36574 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36575 else if (out_n == 8 && in_n == 8)
36576 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36578 break;
36580 case BUILT_IN_ROUND:
36581 /* The round insn does not trap on denormals. */
36582 if (flag_trapping_math || !TARGET_ROUND)
36583 break;
36585 if (out_mode == DFmode && in_mode == DFmode)
36587 if (out_n == 2 && in_n == 2)
36588 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36589 else if (out_n == 4 && in_n == 4)
36590 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36592 break;
36594 case BUILT_IN_ROUNDF:
36595 /* The round insn does not trap on denormals. */
36596 if (flag_trapping_math || !TARGET_ROUND)
36597 break;
36599 if (out_mode == SFmode && in_mode == SFmode)
36601 if (out_n == 4 && in_n == 4)
36602 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36603 else if (out_n == 8 && in_n == 8)
36604 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36606 break;
36608 case BUILT_IN_FMA:
36609 if (out_mode == DFmode && in_mode == DFmode)
36611 if (out_n == 2 && in_n == 2)
36612 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36613 if (out_n == 4 && in_n == 4)
36614 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36616 break;
36618 case BUILT_IN_FMAF:
36619 if (out_mode == SFmode && in_mode == SFmode)
36621 if (out_n == 4 && in_n == 4)
36622 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36623 if (out_n == 8 && in_n == 8)
36624 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36626 break;
36628 default:
36629 break;
36632 /* Dispatch to a handler for a vectorization library. */
36633 if (ix86_veclib_handler)
36634 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36635 type_in);
36637 return NULL_TREE;
36640 /* Handler for an SVML-style interface to
36641 a library with vectorized intrinsics. */
36643 static tree
36644 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36646 char name[20];
36647 tree fntype, new_fndecl, args;
36648 unsigned arity;
36649 const char *bname;
36650 enum machine_mode el_mode, in_mode;
36651 int n, in_n;
36653 /* The SVML is suitable for unsafe math only. */
36654 if (!flag_unsafe_math_optimizations)
36655 return NULL_TREE;
36657 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36658 n = TYPE_VECTOR_SUBPARTS (type_out);
36659 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36660 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36661 if (el_mode != in_mode
36662 || n != in_n)
36663 return NULL_TREE;
36665 switch (fn)
36667 case BUILT_IN_EXP:
36668 case BUILT_IN_LOG:
36669 case BUILT_IN_LOG10:
36670 case BUILT_IN_POW:
36671 case BUILT_IN_TANH:
36672 case BUILT_IN_TAN:
36673 case BUILT_IN_ATAN:
36674 case BUILT_IN_ATAN2:
36675 case BUILT_IN_ATANH:
36676 case BUILT_IN_CBRT:
36677 case BUILT_IN_SINH:
36678 case BUILT_IN_SIN:
36679 case BUILT_IN_ASINH:
36680 case BUILT_IN_ASIN:
36681 case BUILT_IN_COSH:
36682 case BUILT_IN_COS:
36683 case BUILT_IN_ACOSH:
36684 case BUILT_IN_ACOS:
36685 if (el_mode != DFmode || n != 2)
36686 return NULL_TREE;
36687 break;
36689 case BUILT_IN_EXPF:
36690 case BUILT_IN_LOGF:
36691 case BUILT_IN_LOG10F:
36692 case BUILT_IN_POWF:
36693 case BUILT_IN_TANHF:
36694 case BUILT_IN_TANF:
36695 case BUILT_IN_ATANF:
36696 case BUILT_IN_ATAN2F:
36697 case BUILT_IN_ATANHF:
36698 case BUILT_IN_CBRTF:
36699 case BUILT_IN_SINHF:
36700 case BUILT_IN_SINF:
36701 case BUILT_IN_ASINHF:
36702 case BUILT_IN_ASINF:
36703 case BUILT_IN_COSHF:
36704 case BUILT_IN_COSF:
36705 case BUILT_IN_ACOSHF:
36706 case BUILT_IN_ACOSF:
36707 if (el_mode != SFmode || n != 4)
36708 return NULL_TREE;
36709 break;
36711 default:
36712 return NULL_TREE;
36715 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36717 if (fn == BUILT_IN_LOGF)
36718 strcpy (name, "vmlsLn4");
36719 else if (fn == BUILT_IN_LOG)
36720 strcpy (name, "vmldLn2");
36721 else if (n == 4)
36723 sprintf (name, "vmls%s", bname+10);
36724 name[strlen (name)-1] = '4';
36726 else
36727 sprintf (name, "vmld%s2", bname+10);
36729 /* Convert to uppercase. */
36730 name[4] &= ~0x20;
36732 arity = 0;
36733 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36734 args;
36735 args = TREE_CHAIN (args))
36736 arity++;
36738 if (arity == 1)
36739 fntype = build_function_type_list (type_out, type_in, NULL);
36740 else
36741 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36743 /* Build a function declaration for the vectorized function. */
36744 new_fndecl = build_decl (BUILTINS_LOCATION,
36745 FUNCTION_DECL, get_identifier (name), fntype);
36746 TREE_PUBLIC (new_fndecl) = 1;
36747 DECL_EXTERNAL (new_fndecl) = 1;
36748 DECL_IS_NOVOPS (new_fndecl) = 1;
36749 TREE_READONLY (new_fndecl) = 1;
36751 return new_fndecl;
36754 /* Handler for an ACML-style interface to
36755 a library with vectorized intrinsics. */
36757 static tree
36758 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36760 char name[20] = "__vr.._";
36761 tree fntype, new_fndecl, args;
36762 unsigned arity;
36763 const char *bname;
36764 enum machine_mode el_mode, in_mode;
36765 int n, in_n;
36767 /* The ACML is 64bits only and suitable for unsafe math only as
36768 it does not correctly support parts of IEEE with the required
36769 precision such as denormals. */
36770 if (!TARGET_64BIT
36771 || !flag_unsafe_math_optimizations)
36772 return NULL_TREE;
36774 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36775 n = TYPE_VECTOR_SUBPARTS (type_out);
36776 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36777 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36778 if (el_mode != in_mode
36779 || n != in_n)
36780 return NULL_TREE;
36782 switch (fn)
36784 case BUILT_IN_SIN:
36785 case BUILT_IN_COS:
36786 case BUILT_IN_EXP:
36787 case BUILT_IN_LOG:
36788 case BUILT_IN_LOG2:
36789 case BUILT_IN_LOG10:
36790 name[4] = 'd';
36791 name[5] = '2';
36792 if (el_mode != DFmode
36793 || n != 2)
36794 return NULL_TREE;
36795 break;
36797 case BUILT_IN_SINF:
36798 case BUILT_IN_COSF:
36799 case BUILT_IN_EXPF:
36800 case BUILT_IN_POWF:
36801 case BUILT_IN_LOGF:
36802 case BUILT_IN_LOG2F:
36803 case BUILT_IN_LOG10F:
36804 name[4] = 's';
36805 name[5] = '4';
36806 if (el_mode != SFmode
36807 || n != 4)
36808 return NULL_TREE;
36809 break;
36811 default:
36812 return NULL_TREE;
36815 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36816 sprintf (name + 7, "%s", bname+10);
36818 arity = 0;
36819 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36820 args;
36821 args = TREE_CHAIN (args))
36822 arity++;
36824 if (arity == 1)
36825 fntype = build_function_type_list (type_out, type_in, NULL);
36826 else
36827 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36829 /* Build a function declaration for the vectorized function. */
36830 new_fndecl = build_decl (BUILTINS_LOCATION,
36831 FUNCTION_DECL, get_identifier (name), fntype);
36832 TREE_PUBLIC (new_fndecl) = 1;
36833 DECL_EXTERNAL (new_fndecl) = 1;
36834 DECL_IS_NOVOPS (new_fndecl) = 1;
36835 TREE_READONLY (new_fndecl) = 1;
36837 return new_fndecl;
36840 /* Returns a decl of a function that implements gather load with
36841 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36842 Return NULL_TREE if it is not available. */
36844 static tree
36845 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36846 const_tree index_type, int scale)
36848 bool si;
36849 enum ix86_builtins code;
36851 if (! TARGET_AVX2)
36852 return NULL_TREE;
36854 if ((TREE_CODE (index_type) != INTEGER_TYPE
36855 && !POINTER_TYPE_P (index_type))
36856 || (TYPE_MODE (index_type) != SImode
36857 && TYPE_MODE (index_type) != DImode))
36858 return NULL_TREE;
36860 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36861 return NULL_TREE;
36863 /* v*gather* insn sign extends index to pointer mode. */
36864 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36865 && TYPE_UNSIGNED (index_type))
36866 return NULL_TREE;
36868 if (scale <= 0
36869 || scale > 8
36870 || (scale & (scale - 1)) != 0)
36871 return NULL_TREE;
36873 si = TYPE_MODE (index_type) == SImode;
36874 switch (TYPE_MODE (mem_vectype))
36876 case V2DFmode:
36877 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36878 break;
36879 case V4DFmode:
36880 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36881 break;
36882 case V2DImode:
36883 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36884 break;
36885 case V4DImode:
36886 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36887 break;
36888 case V4SFmode:
36889 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36890 break;
36891 case V8SFmode:
36892 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36893 break;
36894 case V4SImode:
36895 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36896 break;
36897 case V8SImode:
36898 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36899 break;
36900 case V8DFmode:
36901 if (TARGET_AVX512F)
36902 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36903 else
36904 return NULL_TREE;
36905 break;
36906 case V8DImode:
36907 if (TARGET_AVX512F)
36908 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36909 else
36910 return NULL_TREE;
36911 break;
36912 case V16SFmode:
36913 if (TARGET_AVX512F)
36914 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36915 else
36916 return NULL_TREE;
36917 break;
36918 case V16SImode:
36919 if (TARGET_AVX512F)
36920 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36921 else
36922 return NULL_TREE;
36923 break;
36924 default:
36925 return NULL_TREE;
36928 return ix86_get_builtin (code);
36931 /* Returns a code for a target-specific builtin that implements
36932 reciprocal of the function, or NULL_TREE if not available. */
36934 static tree
36935 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36936 bool sqrt ATTRIBUTE_UNUSED)
36938 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36939 && flag_finite_math_only && !flag_trapping_math
36940 && flag_unsafe_math_optimizations))
36941 return NULL_TREE;
36943 if (md_fn)
36944 /* Machine dependent builtins. */
36945 switch (fn)
36947 /* Vectorized version of sqrt to rsqrt conversion. */
36948 case IX86_BUILTIN_SQRTPS_NR:
36949 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36951 case IX86_BUILTIN_SQRTPS_NR256:
36952 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36954 default:
36955 return NULL_TREE;
36957 else
36958 /* Normal builtins. */
36959 switch (fn)
36961 /* Sqrt to rsqrt conversion. */
36962 case BUILT_IN_SQRTF:
36963 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36965 default:
36966 return NULL_TREE;
36970 /* Helper for avx_vpermilps256_operand et al. This is also used by
36971 the expansion functions to turn the parallel back into a mask.
36972 The return value is 0 for no match and the imm8+1 for a match. */
36975 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36977 unsigned i, nelt = GET_MODE_NUNITS (mode);
36978 unsigned mask = 0;
36979 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36981 if (XVECLEN (par, 0) != (int) nelt)
36982 return 0;
36984 /* Validate that all of the elements are constants, and not totally
36985 out of range. Copy the data into an integral array to make the
36986 subsequent checks easier. */
36987 for (i = 0; i < nelt; ++i)
36989 rtx er = XVECEXP (par, 0, i);
36990 unsigned HOST_WIDE_INT ei;
36992 if (!CONST_INT_P (er))
36993 return 0;
36994 ei = INTVAL (er);
36995 if (ei >= nelt)
36996 return 0;
36997 ipar[i] = ei;
37000 switch (mode)
37002 case V8DFmode:
37003 /* In the 512-bit DFmode case, we can only move elements within
37004 a 128-bit lane. First fill the second part of the mask,
37005 then fallthru. */
37006 for (i = 4; i < 6; ++i)
37008 if (ipar[i] < 4 || ipar[i] >= 6)
37009 return 0;
37010 mask |= (ipar[i] - 4) << i;
37012 for (i = 6; i < 8; ++i)
37014 if (ipar[i] < 6)
37015 return 0;
37016 mask |= (ipar[i] - 6) << i;
37018 /* FALLTHRU */
37020 case V4DFmode:
37021 /* In the 256-bit DFmode case, we can only move elements within
37022 a 128-bit lane. */
37023 for (i = 0; i < 2; ++i)
37025 if (ipar[i] >= 2)
37026 return 0;
37027 mask |= ipar[i] << i;
37029 for (i = 2; i < 4; ++i)
37031 if (ipar[i] < 2)
37032 return 0;
37033 mask |= (ipar[i] - 2) << i;
37035 break;
37037 case V16SFmode:
37038 /* In 512 bit SFmode case, permutation in the upper 256 bits
37039 must mirror the permutation in the lower 256-bits. */
37040 for (i = 0; i < 8; ++i)
37041 if (ipar[i] + 8 != ipar[i + 8])
37042 return 0;
37043 /* FALLTHRU */
37045 case V8SFmode:
37046 /* In 256 bit SFmode case, we have full freedom of
37047 movement within the low 128-bit lane, but the high 128-bit
37048 lane must mirror the exact same pattern. */
37049 for (i = 0; i < 4; ++i)
37050 if (ipar[i] + 4 != ipar[i + 4])
37051 return 0;
37052 nelt = 4;
37053 /* FALLTHRU */
37055 case V2DFmode:
37056 case V4SFmode:
37057 /* In the 128-bit case, we've full freedom in the placement of
37058 the elements from the source operand. */
37059 for (i = 0; i < nelt; ++i)
37060 mask |= ipar[i] << (i * (nelt / 2));
37061 break;
37063 default:
37064 gcc_unreachable ();
37067 /* Make sure success has a non-zero value by adding one. */
37068 return mask + 1;
37071 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37072 the expansion functions to turn the parallel back into a mask.
37073 The return value is 0 for no match and the imm8+1 for a match. */
37076 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37078 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37079 unsigned mask = 0;
37080 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37082 if (XVECLEN (par, 0) != (int) nelt)
37083 return 0;
37085 /* Validate that all of the elements are constants, and not totally
37086 out of range. Copy the data into an integral array to make the
37087 subsequent checks easier. */
37088 for (i = 0; i < nelt; ++i)
37090 rtx er = XVECEXP (par, 0, i);
37091 unsigned HOST_WIDE_INT ei;
37093 if (!CONST_INT_P (er))
37094 return 0;
37095 ei = INTVAL (er);
37096 if (ei >= 2 * nelt)
37097 return 0;
37098 ipar[i] = ei;
37101 /* Validate that the halves of the permute are halves. */
37102 for (i = 0; i < nelt2 - 1; ++i)
37103 if (ipar[i] + 1 != ipar[i + 1])
37104 return 0;
37105 for (i = nelt2; i < nelt - 1; ++i)
37106 if (ipar[i] + 1 != ipar[i + 1])
37107 return 0;
37109 /* Reconstruct the mask. */
37110 for (i = 0; i < 2; ++i)
37112 unsigned e = ipar[i * nelt2];
37113 if (e % nelt2)
37114 return 0;
37115 e /= nelt2;
37116 mask |= e << (i * 4);
37119 /* Make sure success has a non-zero value by adding one. */
37120 return mask + 1;
37123 /* Return a register priority for hard reg REGNO. */
37124 static int
37125 ix86_register_priority (int hard_regno)
37127 /* ebp and r13 as the base always wants a displacement, r12 as the
37128 base always wants an index. So discourage their usage in an
37129 address. */
37130 if (hard_regno == R12_REG || hard_regno == R13_REG)
37131 return 0;
37132 if (hard_regno == BP_REG)
37133 return 1;
37134 /* New x86-64 int registers result in bigger code size. Discourage
37135 them. */
37136 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37137 return 2;
37138 /* New x86-64 SSE registers result in bigger code size. Discourage
37139 them. */
37140 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37141 return 2;
37142 /* Usage of AX register results in smaller code. Prefer it. */
37143 if (hard_regno == 0)
37144 return 4;
37145 return 3;
37148 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37150 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37151 QImode must go into class Q_REGS.
37152 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37153 movdf to do mem-to-mem moves through integer regs. */
37155 static reg_class_t
37156 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37158 enum machine_mode mode = GET_MODE (x);
37160 /* We're only allowed to return a subclass of CLASS. Many of the
37161 following checks fail for NO_REGS, so eliminate that early. */
37162 if (regclass == NO_REGS)
37163 return NO_REGS;
37165 /* All classes can load zeros. */
37166 if (x == CONST0_RTX (mode))
37167 return regclass;
37169 /* Force constants into memory if we are loading a (nonzero) constant into
37170 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37171 instructions to load from a constant. */
37172 if (CONSTANT_P (x)
37173 && (MAYBE_MMX_CLASS_P (regclass)
37174 || MAYBE_SSE_CLASS_P (regclass)
37175 || MAYBE_MASK_CLASS_P (regclass)))
37176 return NO_REGS;
37178 /* Prefer SSE regs only, if we can use them for math. */
37179 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37180 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37182 /* Floating-point constants need more complex checks. */
37183 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37185 /* General regs can load everything. */
37186 if (reg_class_subset_p (regclass, GENERAL_REGS))
37187 return regclass;
37189 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37190 zero above. We only want to wind up preferring 80387 registers if
37191 we plan on doing computation with them. */
37192 if (TARGET_80387
37193 && standard_80387_constant_p (x) > 0)
37195 /* Limit class to non-sse. */
37196 if (regclass == FLOAT_SSE_REGS)
37197 return FLOAT_REGS;
37198 if (regclass == FP_TOP_SSE_REGS)
37199 return FP_TOP_REG;
37200 if (regclass == FP_SECOND_SSE_REGS)
37201 return FP_SECOND_REG;
37202 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37203 return regclass;
37206 return NO_REGS;
37209 /* Generally when we see PLUS here, it's the function invariant
37210 (plus soft-fp const_int). Which can only be computed into general
37211 regs. */
37212 if (GET_CODE (x) == PLUS)
37213 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37215 /* QImode constants are easy to load, but non-constant QImode data
37216 must go into Q_REGS. */
37217 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37219 if (reg_class_subset_p (regclass, Q_REGS))
37220 return regclass;
37221 if (reg_class_subset_p (Q_REGS, regclass))
37222 return Q_REGS;
37223 return NO_REGS;
37226 return regclass;
37229 /* Discourage putting floating-point values in SSE registers unless
37230 SSE math is being used, and likewise for the 387 registers. */
37231 static reg_class_t
37232 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37234 enum machine_mode mode = GET_MODE (x);
37236 /* Restrict the output reload class to the register bank that we are doing
37237 math on. If we would like not to return a subset of CLASS, reject this
37238 alternative: if reload cannot do this, it will still use its choice. */
37239 mode = GET_MODE (x);
37240 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37241 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37243 if (X87_FLOAT_MODE_P (mode))
37245 if (regclass == FP_TOP_SSE_REGS)
37246 return FP_TOP_REG;
37247 else if (regclass == FP_SECOND_SSE_REGS)
37248 return FP_SECOND_REG;
37249 else
37250 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37253 return regclass;
37256 static reg_class_t
37257 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37258 enum machine_mode mode, secondary_reload_info *sri)
37260 /* Double-word spills from general registers to non-offsettable memory
37261 references (zero-extended addresses) require special handling. */
37262 if (TARGET_64BIT
37263 && MEM_P (x)
37264 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37265 && INTEGER_CLASS_P (rclass)
37266 && !offsettable_memref_p (x))
37268 sri->icode = (in_p
37269 ? CODE_FOR_reload_noff_load
37270 : CODE_FOR_reload_noff_store);
37271 /* Add the cost of moving address to a temporary. */
37272 sri->extra_cost = 1;
37274 return NO_REGS;
37277 /* QImode spills from non-QI registers require
37278 intermediate register on 32bit targets. */
37279 if (mode == QImode
37280 && (MAYBE_MASK_CLASS_P (rclass)
37281 || (!TARGET_64BIT && !in_p
37282 && INTEGER_CLASS_P (rclass)
37283 && MAYBE_NON_Q_CLASS_P (rclass))))
37285 int regno;
37287 if (REG_P (x))
37288 regno = REGNO (x);
37289 else
37290 regno = -1;
37292 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37293 regno = true_regnum (x);
37295 /* Return Q_REGS if the operand is in memory. */
37296 if (regno == -1)
37297 return Q_REGS;
37300 /* This condition handles corner case where an expression involving
37301 pointers gets vectorized. We're trying to use the address of a
37302 stack slot as a vector initializer.
37304 (set (reg:V2DI 74 [ vect_cst_.2 ])
37305 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37307 Eventually frame gets turned into sp+offset like this:
37309 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37310 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37311 (const_int 392 [0x188]))))
37313 That later gets turned into:
37315 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37316 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37317 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37319 We'll have the following reload recorded:
37321 Reload 0: reload_in (DI) =
37322 (plus:DI (reg/f:DI 7 sp)
37323 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37324 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37325 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37326 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37327 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37328 reload_reg_rtx: (reg:V2DI 22 xmm1)
37330 Which isn't going to work since SSE instructions can't handle scalar
37331 additions. Returning GENERAL_REGS forces the addition into integer
37332 register and reload can handle subsequent reloads without problems. */
37334 if (in_p && GET_CODE (x) == PLUS
37335 && SSE_CLASS_P (rclass)
37336 && SCALAR_INT_MODE_P (mode))
37337 return GENERAL_REGS;
37339 return NO_REGS;
37342 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37344 static bool
37345 ix86_class_likely_spilled_p (reg_class_t rclass)
37347 switch (rclass)
37349 case AREG:
37350 case DREG:
37351 case CREG:
37352 case BREG:
37353 case AD_REGS:
37354 case SIREG:
37355 case DIREG:
37356 case SSE_FIRST_REG:
37357 case FP_TOP_REG:
37358 case FP_SECOND_REG:
37359 return true;
37361 default:
37362 break;
37365 return false;
37368 /* If we are copying between general and FP registers, we need a memory
37369 location. The same is true for SSE and MMX registers.
37371 To optimize register_move_cost performance, allow inline variant.
37373 The macro can't work reliably when one of the CLASSES is class containing
37374 registers from multiple units (SSE, MMX, integer). We avoid this by never
37375 combining those units in single alternative in the machine description.
37376 Ensure that this constraint holds to avoid unexpected surprises.
37378 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37379 enforce these sanity checks. */
37381 static inline bool
37382 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37383 enum machine_mode mode, int strict)
37385 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37386 return false;
37387 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37388 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37389 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37390 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37391 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37392 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37394 gcc_assert (!strict || lra_in_progress);
37395 return true;
37398 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37399 return true;
37401 /* ??? This is a lie. We do have moves between mmx/general, and for
37402 mmx/sse2. But by saying we need secondary memory we discourage the
37403 register allocator from using the mmx registers unless needed. */
37404 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37405 return true;
37407 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37409 /* SSE1 doesn't have any direct moves from other classes. */
37410 if (!TARGET_SSE2)
37411 return true;
37413 /* If the target says that inter-unit moves are more expensive
37414 than moving through memory, then don't generate them. */
37415 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37416 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37417 return true;
37419 /* Between SSE and general, we have moves no larger than word size. */
37420 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37421 return true;
37424 return false;
37427 bool
37428 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37429 enum machine_mode mode, int strict)
37431 return inline_secondary_memory_needed (class1, class2, mode, strict);
37434 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37436 On the 80386, this is the size of MODE in words,
37437 except in the FP regs, where a single reg is always enough. */
37439 static unsigned char
37440 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37442 if (MAYBE_INTEGER_CLASS_P (rclass))
37444 if (mode == XFmode)
37445 return (TARGET_64BIT ? 2 : 3);
37446 else if (mode == XCmode)
37447 return (TARGET_64BIT ? 4 : 6);
37448 else
37449 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37451 else
37453 if (COMPLEX_MODE_P (mode))
37454 return 2;
37455 else
37456 return 1;
37460 /* Return true if the registers in CLASS cannot represent the change from
37461 modes FROM to TO. */
37463 bool
37464 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37465 enum reg_class regclass)
37467 if (from == to)
37468 return false;
37470 /* x87 registers can't do subreg at all, as all values are reformatted
37471 to extended precision. */
37472 if (MAYBE_FLOAT_CLASS_P (regclass))
37473 return true;
37475 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37477 /* Vector registers do not support QI or HImode loads. If we don't
37478 disallow a change to these modes, reload will assume it's ok to
37479 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37480 the vec_dupv4hi pattern. */
37481 if (GET_MODE_SIZE (from) < 4)
37482 return true;
37484 /* Vector registers do not support subreg with nonzero offsets, which
37485 are otherwise valid for integer registers. Since we can't see
37486 whether we have a nonzero offset from here, prohibit all
37487 nonparadoxical subregs changing size. */
37488 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37489 return true;
37492 return false;
37495 /* Return the cost of moving data of mode M between a
37496 register and memory. A value of 2 is the default; this cost is
37497 relative to those in `REGISTER_MOVE_COST'.
37499 This function is used extensively by register_move_cost that is used to
37500 build tables at startup. Make it inline in this case.
37501 When IN is 2, return maximum of in and out move cost.
37503 If moving between registers and memory is more expensive than
37504 between two registers, you should define this macro to express the
37505 relative cost.
37507 Model also increased moving costs of QImode registers in non
37508 Q_REGS classes.
37510 static inline int
37511 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37512 int in)
37514 int cost;
37515 if (FLOAT_CLASS_P (regclass))
37517 int index;
37518 switch (mode)
37520 case SFmode:
37521 index = 0;
37522 break;
37523 case DFmode:
37524 index = 1;
37525 break;
37526 case XFmode:
37527 index = 2;
37528 break;
37529 default:
37530 return 100;
37532 if (in == 2)
37533 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37534 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37536 if (SSE_CLASS_P (regclass))
37538 int index;
37539 switch (GET_MODE_SIZE (mode))
37541 case 4:
37542 index = 0;
37543 break;
37544 case 8:
37545 index = 1;
37546 break;
37547 case 16:
37548 index = 2;
37549 break;
37550 default:
37551 return 100;
37553 if (in == 2)
37554 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37555 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37557 if (MMX_CLASS_P (regclass))
37559 int index;
37560 switch (GET_MODE_SIZE (mode))
37562 case 4:
37563 index = 0;
37564 break;
37565 case 8:
37566 index = 1;
37567 break;
37568 default:
37569 return 100;
37571 if (in)
37572 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37573 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37575 switch (GET_MODE_SIZE (mode))
37577 case 1:
37578 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37580 if (!in)
37581 return ix86_cost->int_store[0];
37582 if (TARGET_PARTIAL_REG_DEPENDENCY
37583 && optimize_function_for_speed_p (cfun))
37584 cost = ix86_cost->movzbl_load;
37585 else
37586 cost = ix86_cost->int_load[0];
37587 if (in == 2)
37588 return MAX (cost, ix86_cost->int_store[0]);
37589 return cost;
37591 else
37593 if (in == 2)
37594 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37595 if (in)
37596 return ix86_cost->movzbl_load;
37597 else
37598 return ix86_cost->int_store[0] + 4;
37600 break;
37601 case 2:
37602 if (in == 2)
37603 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37604 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37605 default:
37606 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37607 if (mode == TFmode)
37608 mode = XFmode;
37609 if (in == 2)
37610 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37611 else if (in)
37612 cost = ix86_cost->int_load[2];
37613 else
37614 cost = ix86_cost->int_store[2];
37615 return (cost * (((int) GET_MODE_SIZE (mode)
37616 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37620 static int
37621 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37622 bool in)
37624 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37628 /* Return the cost of moving data from a register in class CLASS1 to
37629 one in class CLASS2.
37631 It is not required that the cost always equal 2 when FROM is the same as TO;
37632 on some machines it is expensive to move between registers if they are not
37633 general registers. */
37635 static int
37636 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37637 reg_class_t class2_i)
37639 enum reg_class class1 = (enum reg_class) class1_i;
37640 enum reg_class class2 = (enum reg_class) class2_i;
37642 /* In case we require secondary memory, compute cost of the store followed
37643 by load. In order to avoid bad register allocation choices, we need
37644 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37646 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37648 int cost = 1;
37650 cost += inline_memory_move_cost (mode, class1, 2);
37651 cost += inline_memory_move_cost (mode, class2, 2);
37653 /* In case of copying from general_purpose_register we may emit multiple
37654 stores followed by single load causing memory size mismatch stall.
37655 Count this as arbitrarily high cost of 20. */
37656 if (targetm.class_max_nregs (class1, mode)
37657 > targetm.class_max_nregs (class2, mode))
37658 cost += 20;
37660 /* In the case of FP/MMX moves, the registers actually overlap, and we
37661 have to switch modes in order to treat them differently. */
37662 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37663 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37664 cost += 20;
37666 return cost;
37669 /* Moves between SSE/MMX and integer unit are expensive. */
37670 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37671 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37673 /* ??? By keeping returned value relatively high, we limit the number
37674 of moves between integer and MMX/SSE registers for all targets.
37675 Additionally, high value prevents problem with x86_modes_tieable_p(),
37676 where integer modes in MMX/SSE registers are not tieable
37677 because of missing QImode and HImode moves to, from or between
37678 MMX/SSE registers. */
37679 return MAX (8, ix86_cost->mmxsse_to_integer);
37681 if (MAYBE_FLOAT_CLASS_P (class1))
37682 return ix86_cost->fp_move;
37683 if (MAYBE_SSE_CLASS_P (class1))
37684 return ix86_cost->sse_move;
37685 if (MAYBE_MMX_CLASS_P (class1))
37686 return ix86_cost->mmx_move;
37687 return 2;
37690 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37691 MODE. */
37693 bool
37694 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37696 /* Flags and only flags can only hold CCmode values. */
37697 if (CC_REGNO_P (regno))
37698 return GET_MODE_CLASS (mode) == MODE_CC;
37699 if (GET_MODE_CLASS (mode) == MODE_CC
37700 || GET_MODE_CLASS (mode) == MODE_RANDOM
37701 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37702 return false;
37703 if (STACK_REGNO_P (regno))
37704 return VALID_FP_MODE_P (mode);
37705 if (MASK_REGNO_P (regno))
37706 return VALID_MASK_REG_MODE (mode);
37707 if (SSE_REGNO_P (regno))
37709 /* We implement the move patterns for all vector modes into and
37710 out of SSE registers, even when no operation instructions
37711 are available. */
37713 /* For AVX-512 we allow, regardless of regno:
37714 - XI mode
37715 - any of 512-bit wide vector mode
37716 - any scalar mode. */
37717 if (TARGET_AVX512F
37718 && (mode == XImode
37719 || VALID_AVX512F_REG_MODE (mode)
37720 || VALID_AVX512F_SCALAR_MODE (mode)))
37721 return true;
37723 /* xmm16-xmm31 are only available for AVX-512. */
37724 if (EXT_REX_SSE_REGNO_P (regno))
37725 return false;
37727 /* OImode and AVX modes are available only when AVX is enabled. */
37728 return ((TARGET_AVX
37729 && VALID_AVX256_REG_OR_OI_MODE (mode))
37730 || VALID_SSE_REG_MODE (mode)
37731 || VALID_SSE2_REG_MODE (mode)
37732 || VALID_MMX_REG_MODE (mode)
37733 || VALID_MMX_REG_MODE_3DNOW (mode));
37735 if (MMX_REGNO_P (regno))
37737 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37738 so if the register is available at all, then we can move data of
37739 the given mode into or out of it. */
37740 return (VALID_MMX_REG_MODE (mode)
37741 || VALID_MMX_REG_MODE_3DNOW (mode));
37744 if (mode == QImode)
37746 /* Take care for QImode values - they can be in non-QI regs,
37747 but then they do cause partial register stalls. */
37748 if (ANY_QI_REGNO_P (regno))
37749 return true;
37750 if (!TARGET_PARTIAL_REG_STALL)
37751 return true;
37752 /* LRA checks if the hard register is OK for the given mode.
37753 QImode values can live in non-QI regs, so we allow all
37754 registers here. */
37755 if (lra_in_progress)
37756 return true;
37757 return !can_create_pseudo_p ();
37759 /* We handle both integer and floats in the general purpose registers. */
37760 else if (VALID_INT_MODE_P (mode))
37761 return true;
37762 else if (VALID_FP_MODE_P (mode))
37763 return true;
37764 else if (VALID_DFP_MODE_P (mode))
37765 return true;
37766 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37767 on to use that value in smaller contexts, this can easily force a
37768 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37769 supporting DImode, allow it. */
37770 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37771 return true;
37773 return false;
37776 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37777 tieable integer mode. */
37779 static bool
37780 ix86_tieable_integer_mode_p (enum machine_mode mode)
37782 switch (mode)
37784 case HImode:
37785 case SImode:
37786 return true;
37788 case QImode:
37789 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37791 case DImode:
37792 return TARGET_64BIT;
37794 default:
37795 return false;
37799 /* Return true if MODE1 is accessible in a register that can hold MODE2
37800 without copying. That is, all register classes that can hold MODE2
37801 can also hold MODE1. */
37803 bool
37804 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37806 if (mode1 == mode2)
37807 return true;
37809 if (ix86_tieable_integer_mode_p (mode1)
37810 && ix86_tieable_integer_mode_p (mode2))
37811 return true;
37813 /* MODE2 being XFmode implies fp stack or general regs, which means we
37814 can tie any smaller floating point modes to it. Note that we do not
37815 tie this with TFmode. */
37816 if (mode2 == XFmode)
37817 return mode1 == SFmode || mode1 == DFmode;
37819 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37820 that we can tie it with SFmode. */
37821 if (mode2 == DFmode)
37822 return mode1 == SFmode;
37824 /* If MODE2 is only appropriate for an SSE register, then tie with
37825 any other mode acceptable to SSE registers. */
37826 if (GET_MODE_SIZE (mode2) == 32
37827 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37828 return (GET_MODE_SIZE (mode1) == 32
37829 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37830 if (GET_MODE_SIZE (mode2) == 16
37831 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37832 return (GET_MODE_SIZE (mode1) == 16
37833 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37835 /* If MODE2 is appropriate for an MMX register, then tie
37836 with any other mode acceptable to MMX registers. */
37837 if (GET_MODE_SIZE (mode2) == 8
37838 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37839 return (GET_MODE_SIZE (mode1) == 8
37840 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37842 return false;
37845 /* Return the cost of moving between two registers of mode MODE. */
37847 static int
37848 ix86_set_reg_reg_cost (enum machine_mode mode)
37850 unsigned int units = UNITS_PER_WORD;
37852 switch (GET_MODE_CLASS (mode))
37854 default:
37855 break;
37857 case MODE_CC:
37858 units = GET_MODE_SIZE (CCmode);
37859 break;
37861 case MODE_FLOAT:
37862 if ((TARGET_SSE && mode == TFmode)
37863 || (TARGET_80387 && mode == XFmode)
37864 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37865 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37866 units = GET_MODE_SIZE (mode);
37867 break;
37869 case MODE_COMPLEX_FLOAT:
37870 if ((TARGET_SSE && mode == TCmode)
37871 || (TARGET_80387 && mode == XCmode)
37872 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37873 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37874 units = GET_MODE_SIZE (mode);
37875 break;
37877 case MODE_VECTOR_INT:
37878 case MODE_VECTOR_FLOAT:
37879 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37880 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37881 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37882 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37883 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37884 units = GET_MODE_SIZE (mode);
37887 /* Return the cost of moving between two registers of mode MODE,
37888 assuming that the move will be in pieces of at most UNITS bytes. */
37889 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37892 /* Compute a (partial) cost for rtx X. Return true if the complete
37893 cost has been computed, and false if subexpressions should be
37894 scanned. In either case, *TOTAL contains the cost result. */
37896 static bool
37897 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37898 bool speed)
37900 rtx mask;
37901 enum rtx_code code = (enum rtx_code) code_i;
37902 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37903 enum machine_mode mode = GET_MODE (x);
37904 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37906 switch (code)
37908 case SET:
37909 if (register_operand (SET_DEST (x), VOIDmode)
37910 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37912 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37913 return true;
37915 return false;
37917 case CONST_INT:
37918 case CONST:
37919 case LABEL_REF:
37920 case SYMBOL_REF:
37921 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37922 *total = 3;
37923 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37924 *total = 2;
37925 else if (flag_pic && SYMBOLIC_CONST (x)
37926 && !(TARGET_64BIT
37927 && (GET_CODE (x) == LABEL_REF
37928 || (GET_CODE (x) == SYMBOL_REF
37929 && SYMBOL_REF_LOCAL_P (x)))))
37930 *total = 1;
37931 else
37932 *total = 0;
37933 return true;
37935 case CONST_DOUBLE:
37936 if (mode == VOIDmode)
37938 *total = 0;
37939 return true;
37941 switch (standard_80387_constant_p (x))
37943 case 1: /* 0.0 */
37944 *total = 1;
37945 return true;
37946 default: /* Other constants */
37947 *total = 2;
37948 return true;
37949 case 0:
37950 case -1:
37951 break;
37953 if (SSE_FLOAT_MODE_P (mode))
37955 case CONST_VECTOR:
37956 switch (standard_sse_constant_p (x))
37958 case 0:
37959 break;
37960 case 1: /* 0: xor eliminates false dependency */
37961 *total = 0;
37962 return true;
37963 default: /* -1: cmp contains false dependency */
37964 *total = 1;
37965 return true;
37968 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37969 it'll probably end up. Add a penalty for size. */
37970 *total = (COSTS_N_INSNS (1)
37971 + (flag_pic != 0 && !TARGET_64BIT)
37972 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37973 return true;
37975 case ZERO_EXTEND:
37976 /* The zero extensions is often completely free on x86_64, so make
37977 it as cheap as possible. */
37978 if (TARGET_64BIT && mode == DImode
37979 && GET_MODE (XEXP (x, 0)) == SImode)
37980 *total = 1;
37981 else if (TARGET_ZERO_EXTEND_WITH_AND)
37982 *total = cost->add;
37983 else
37984 *total = cost->movzx;
37985 return false;
37987 case SIGN_EXTEND:
37988 *total = cost->movsx;
37989 return false;
37991 case ASHIFT:
37992 if (SCALAR_INT_MODE_P (mode)
37993 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37994 && CONST_INT_P (XEXP (x, 1)))
37996 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37997 if (value == 1)
37999 *total = cost->add;
38000 return false;
38002 if ((value == 2 || value == 3)
38003 && cost->lea <= cost->shift_const)
38005 *total = cost->lea;
38006 return false;
38009 /* FALLTHRU */
38011 case ROTATE:
38012 case ASHIFTRT:
38013 case LSHIFTRT:
38014 case ROTATERT:
38015 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38017 /* ??? Should be SSE vector operation cost. */
38018 /* At least for published AMD latencies, this really is the same
38019 as the latency for a simple fpu operation like fabs. */
38020 /* V*QImode is emulated with 1-11 insns. */
38021 if (mode == V16QImode || mode == V32QImode)
38023 int count = 11;
38024 if (TARGET_XOP && mode == V16QImode)
38026 /* For XOP we use vpshab, which requires a broadcast of the
38027 value to the variable shift insn. For constants this
38028 means a V16Q const in mem; even when we can perform the
38029 shift with one insn set the cost to prefer paddb. */
38030 if (CONSTANT_P (XEXP (x, 1)))
38032 *total = (cost->fabs
38033 + rtx_cost (XEXP (x, 0), code, 0, speed)
38034 + (speed ? 2 : COSTS_N_BYTES (16)));
38035 return true;
38037 count = 3;
38039 else if (TARGET_SSSE3)
38040 count = 7;
38041 *total = cost->fabs * count;
38043 else
38044 *total = cost->fabs;
38046 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38048 if (CONST_INT_P (XEXP (x, 1)))
38050 if (INTVAL (XEXP (x, 1)) > 32)
38051 *total = cost->shift_const + COSTS_N_INSNS (2);
38052 else
38053 *total = cost->shift_const * 2;
38055 else
38057 if (GET_CODE (XEXP (x, 1)) == AND)
38058 *total = cost->shift_var * 2;
38059 else
38060 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38063 else
38065 if (CONST_INT_P (XEXP (x, 1)))
38066 *total = cost->shift_const;
38067 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38068 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38070 /* Return the cost after shift-and truncation. */
38071 *total = cost->shift_var;
38072 return true;
38074 else
38075 *total = cost->shift_var;
38077 return false;
38079 case FMA:
38081 rtx sub;
38083 gcc_assert (FLOAT_MODE_P (mode));
38084 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38086 /* ??? SSE scalar/vector cost should be used here. */
38087 /* ??? Bald assumption that fma has the same cost as fmul. */
38088 *total = cost->fmul;
38089 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38091 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38092 sub = XEXP (x, 0);
38093 if (GET_CODE (sub) == NEG)
38094 sub = XEXP (sub, 0);
38095 *total += rtx_cost (sub, FMA, 0, speed);
38097 sub = XEXP (x, 2);
38098 if (GET_CODE (sub) == NEG)
38099 sub = XEXP (sub, 0);
38100 *total += rtx_cost (sub, FMA, 2, speed);
38101 return true;
38104 case MULT:
38105 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38107 /* ??? SSE scalar cost should be used here. */
38108 *total = cost->fmul;
38109 return false;
38111 else if (X87_FLOAT_MODE_P (mode))
38113 *total = cost->fmul;
38114 return false;
38116 else if (FLOAT_MODE_P (mode))
38118 /* ??? SSE vector cost should be used here. */
38119 *total = cost->fmul;
38120 return false;
38122 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38124 /* V*QImode is emulated with 7-13 insns. */
38125 if (mode == V16QImode || mode == V32QImode)
38127 int extra = 11;
38128 if (TARGET_XOP && mode == V16QImode)
38129 extra = 5;
38130 else if (TARGET_SSSE3)
38131 extra = 6;
38132 *total = cost->fmul * 2 + cost->fabs * extra;
38134 /* V*DImode is emulated with 5-8 insns. */
38135 else if (mode == V2DImode || mode == V4DImode)
38137 if (TARGET_XOP && mode == V2DImode)
38138 *total = cost->fmul * 2 + cost->fabs * 3;
38139 else
38140 *total = cost->fmul * 3 + cost->fabs * 5;
38142 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38143 insns, including two PMULUDQ. */
38144 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38145 *total = cost->fmul * 2 + cost->fabs * 5;
38146 else
38147 *total = cost->fmul;
38148 return false;
38150 else
38152 rtx op0 = XEXP (x, 0);
38153 rtx op1 = XEXP (x, 1);
38154 int nbits;
38155 if (CONST_INT_P (XEXP (x, 1)))
38157 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38158 for (nbits = 0; value != 0; value &= value - 1)
38159 nbits++;
38161 else
38162 /* This is arbitrary. */
38163 nbits = 7;
38165 /* Compute costs correctly for widening multiplication. */
38166 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38167 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38168 == GET_MODE_SIZE (mode))
38170 int is_mulwiden = 0;
38171 enum machine_mode inner_mode = GET_MODE (op0);
38173 if (GET_CODE (op0) == GET_CODE (op1))
38174 is_mulwiden = 1, op1 = XEXP (op1, 0);
38175 else if (CONST_INT_P (op1))
38177 if (GET_CODE (op0) == SIGN_EXTEND)
38178 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38179 == INTVAL (op1);
38180 else
38181 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38184 if (is_mulwiden)
38185 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38188 *total = (cost->mult_init[MODE_INDEX (mode)]
38189 + nbits * cost->mult_bit
38190 + rtx_cost (op0, outer_code, opno, speed)
38191 + rtx_cost (op1, outer_code, opno, speed));
38193 return true;
38196 case DIV:
38197 case UDIV:
38198 case MOD:
38199 case UMOD:
38200 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38201 /* ??? SSE cost should be used here. */
38202 *total = cost->fdiv;
38203 else if (X87_FLOAT_MODE_P (mode))
38204 *total = cost->fdiv;
38205 else if (FLOAT_MODE_P (mode))
38206 /* ??? SSE vector cost should be used here. */
38207 *total = cost->fdiv;
38208 else
38209 *total = cost->divide[MODE_INDEX (mode)];
38210 return false;
38212 case PLUS:
38213 if (GET_MODE_CLASS (mode) == MODE_INT
38214 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38216 if (GET_CODE (XEXP (x, 0)) == PLUS
38217 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38218 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38219 && CONSTANT_P (XEXP (x, 1)))
38221 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38222 if (val == 2 || val == 4 || val == 8)
38224 *total = cost->lea;
38225 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38226 outer_code, opno, speed);
38227 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38228 outer_code, opno, speed);
38229 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38230 return true;
38233 else if (GET_CODE (XEXP (x, 0)) == MULT
38234 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38236 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38237 if (val == 2 || val == 4 || val == 8)
38239 *total = cost->lea;
38240 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38241 outer_code, opno, speed);
38242 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38243 return true;
38246 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38248 *total = cost->lea;
38249 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38250 outer_code, opno, speed);
38251 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38252 outer_code, opno, speed);
38253 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38254 return true;
38257 /* FALLTHRU */
38259 case MINUS:
38260 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38262 /* ??? SSE cost should be used here. */
38263 *total = cost->fadd;
38264 return false;
38266 else if (X87_FLOAT_MODE_P (mode))
38268 *total = cost->fadd;
38269 return false;
38271 else if (FLOAT_MODE_P (mode))
38273 /* ??? SSE vector cost should be used here. */
38274 *total = cost->fadd;
38275 return false;
38277 /* FALLTHRU */
38279 case AND:
38280 case IOR:
38281 case XOR:
38282 if (GET_MODE_CLASS (mode) == MODE_INT
38283 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38285 *total = (cost->add * 2
38286 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38287 << (GET_MODE (XEXP (x, 0)) != DImode))
38288 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38289 << (GET_MODE (XEXP (x, 1)) != DImode)));
38290 return true;
38292 /* FALLTHRU */
38294 case NEG:
38295 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38297 /* ??? SSE cost should be used here. */
38298 *total = cost->fchs;
38299 return false;
38301 else if (X87_FLOAT_MODE_P (mode))
38303 *total = cost->fchs;
38304 return false;
38306 else if (FLOAT_MODE_P (mode))
38308 /* ??? SSE vector cost should be used here. */
38309 *total = cost->fchs;
38310 return false;
38312 /* FALLTHRU */
38314 case NOT:
38315 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38317 /* ??? Should be SSE vector operation cost. */
38318 /* At least for published AMD latencies, this really is the same
38319 as the latency for a simple fpu operation like fabs. */
38320 *total = cost->fabs;
38322 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38323 *total = cost->add * 2;
38324 else
38325 *total = cost->add;
38326 return false;
38328 case COMPARE:
38329 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38330 && XEXP (XEXP (x, 0), 1) == const1_rtx
38331 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38332 && XEXP (x, 1) == const0_rtx)
38334 /* This kind of construct is implemented using test[bwl].
38335 Treat it as if we had an AND. */
38336 *total = (cost->add
38337 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38338 + rtx_cost (const1_rtx, outer_code, opno, speed));
38339 return true;
38341 return false;
38343 case FLOAT_EXTEND:
38344 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38345 *total = 0;
38346 return false;
38348 case ABS:
38349 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38350 /* ??? SSE cost should be used here. */
38351 *total = cost->fabs;
38352 else if (X87_FLOAT_MODE_P (mode))
38353 *total = cost->fabs;
38354 else if (FLOAT_MODE_P (mode))
38355 /* ??? SSE vector cost should be used here. */
38356 *total = cost->fabs;
38357 return false;
38359 case SQRT:
38360 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38361 /* ??? SSE cost should be used here. */
38362 *total = cost->fsqrt;
38363 else if (X87_FLOAT_MODE_P (mode))
38364 *total = cost->fsqrt;
38365 else if (FLOAT_MODE_P (mode))
38366 /* ??? SSE vector cost should be used here. */
38367 *total = cost->fsqrt;
38368 return false;
38370 case UNSPEC:
38371 if (XINT (x, 1) == UNSPEC_TP)
38372 *total = 0;
38373 return false;
38375 case VEC_SELECT:
38376 case VEC_CONCAT:
38377 case VEC_DUPLICATE:
38378 /* ??? Assume all of these vector manipulation patterns are
38379 recognizable. In which case they all pretty much have the
38380 same cost. */
38381 *total = cost->fabs;
38382 return true;
38383 case VEC_MERGE:
38384 mask = XEXP (x, 2);
38385 /* This is masked instruction, assume the same cost,
38386 as nonmasked variant. */
38387 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38388 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38389 else
38390 *total = cost->fabs;
38391 return true;
38393 default:
38394 return false;
38398 #if TARGET_MACHO
38400 static int current_machopic_label_num;
38402 /* Given a symbol name and its associated stub, write out the
38403 definition of the stub. */
38405 void
38406 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38408 unsigned int length;
38409 char *binder_name, *symbol_name, lazy_ptr_name[32];
38410 int label = ++current_machopic_label_num;
38412 /* For 64-bit we shouldn't get here. */
38413 gcc_assert (!TARGET_64BIT);
38415 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38416 symb = targetm.strip_name_encoding (symb);
38418 length = strlen (stub);
38419 binder_name = XALLOCAVEC (char, length + 32);
38420 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38422 length = strlen (symb);
38423 symbol_name = XALLOCAVEC (char, length + 32);
38424 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38426 sprintf (lazy_ptr_name, "L%d$lz", label);
38428 if (MACHOPIC_ATT_STUB)
38429 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38430 else if (MACHOPIC_PURE)
38431 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38432 else
38433 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38435 fprintf (file, "%s:\n", stub);
38436 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38438 if (MACHOPIC_ATT_STUB)
38440 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38442 else if (MACHOPIC_PURE)
38444 /* PIC stub. */
38445 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38446 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38447 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38448 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38449 label, lazy_ptr_name, label);
38450 fprintf (file, "\tjmp\t*%%ecx\n");
38452 else
38453 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38455 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38456 it needs no stub-binding-helper. */
38457 if (MACHOPIC_ATT_STUB)
38458 return;
38460 fprintf (file, "%s:\n", binder_name);
38462 if (MACHOPIC_PURE)
38464 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38465 fprintf (file, "\tpushl\t%%ecx\n");
38467 else
38468 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38470 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38472 /* N.B. Keep the correspondence of these
38473 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38474 old-pic/new-pic/non-pic stubs; altering this will break
38475 compatibility with existing dylibs. */
38476 if (MACHOPIC_PURE)
38478 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38479 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38481 else
38482 /* 16-byte -mdynamic-no-pic stub. */
38483 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38485 fprintf (file, "%s:\n", lazy_ptr_name);
38486 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38487 fprintf (file, ASM_LONG "%s\n", binder_name);
38489 #endif /* TARGET_MACHO */
38491 /* Order the registers for register allocator. */
38493 void
38494 x86_order_regs_for_local_alloc (void)
38496 int pos = 0;
38497 int i;
38499 /* First allocate the local general purpose registers. */
38500 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38501 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38502 reg_alloc_order [pos++] = i;
38504 /* Global general purpose registers. */
38505 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38506 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38507 reg_alloc_order [pos++] = i;
38509 /* x87 registers come first in case we are doing FP math
38510 using them. */
38511 if (!TARGET_SSE_MATH)
38512 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38513 reg_alloc_order [pos++] = i;
38515 /* SSE registers. */
38516 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38517 reg_alloc_order [pos++] = i;
38518 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38519 reg_alloc_order [pos++] = i;
38521 /* Extended REX SSE registers. */
38522 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38523 reg_alloc_order [pos++] = i;
38525 /* Mask register. */
38526 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38527 reg_alloc_order [pos++] = i;
38529 /* x87 registers. */
38530 if (TARGET_SSE_MATH)
38531 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38532 reg_alloc_order [pos++] = i;
38534 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38535 reg_alloc_order [pos++] = i;
38537 /* Initialize the rest of array as we do not allocate some registers
38538 at all. */
38539 while (pos < FIRST_PSEUDO_REGISTER)
38540 reg_alloc_order [pos++] = 0;
38543 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38544 in struct attribute_spec handler. */
38545 static tree
38546 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38547 tree args,
38548 int flags ATTRIBUTE_UNUSED,
38549 bool *no_add_attrs)
38551 if (TREE_CODE (*node) != FUNCTION_TYPE
38552 && TREE_CODE (*node) != METHOD_TYPE
38553 && TREE_CODE (*node) != FIELD_DECL
38554 && TREE_CODE (*node) != TYPE_DECL)
38556 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38557 name);
38558 *no_add_attrs = true;
38559 return NULL_TREE;
38561 if (TARGET_64BIT)
38563 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38564 name);
38565 *no_add_attrs = true;
38566 return NULL_TREE;
38568 if (is_attribute_p ("callee_pop_aggregate_return", name))
38570 tree cst;
38572 cst = TREE_VALUE (args);
38573 if (TREE_CODE (cst) != INTEGER_CST)
38575 warning (OPT_Wattributes,
38576 "%qE attribute requires an integer constant argument",
38577 name);
38578 *no_add_attrs = true;
38580 else if (compare_tree_int (cst, 0) != 0
38581 && compare_tree_int (cst, 1) != 0)
38583 warning (OPT_Wattributes,
38584 "argument to %qE attribute is neither zero, nor one",
38585 name);
38586 *no_add_attrs = true;
38589 return NULL_TREE;
38592 return NULL_TREE;
38595 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38596 struct attribute_spec.handler. */
38597 static tree
38598 ix86_handle_abi_attribute (tree *node, tree name,
38599 tree args ATTRIBUTE_UNUSED,
38600 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38602 if (TREE_CODE (*node) != FUNCTION_TYPE
38603 && TREE_CODE (*node) != METHOD_TYPE
38604 && TREE_CODE (*node) != FIELD_DECL
38605 && TREE_CODE (*node) != TYPE_DECL)
38607 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38608 name);
38609 *no_add_attrs = true;
38610 return NULL_TREE;
38613 /* Can combine regparm with all attributes but fastcall. */
38614 if (is_attribute_p ("ms_abi", name))
38616 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38618 error ("ms_abi and sysv_abi attributes are not compatible");
38621 return NULL_TREE;
38623 else if (is_attribute_p ("sysv_abi", name))
38625 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38627 error ("ms_abi and sysv_abi attributes are not compatible");
38630 return NULL_TREE;
38633 return NULL_TREE;
38636 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38637 struct attribute_spec.handler. */
38638 static tree
38639 ix86_handle_struct_attribute (tree *node, tree name,
38640 tree args ATTRIBUTE_UNUSED,
38641 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38643 tree *type = NULL;
38644 if (DECL_P (*node))
38646 if (TREE_CODE (*node) == TYPE_DECL)
38647 type = &TREE_TYPE (*node);
38649 else
38650 type = node;
38652 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38654 warning (OPT_Wattributes, "%qE attribute ignored",
38655 name);
38656 *no_add_attrs = true;
38659 else if ((is_attribute_p ("ms_struct", name)
38660 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38661 || ((is_attribute_p ("gcc_struct", name)
38662 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38664 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38665 name);
38666 *no_add_attrs = true;
38669 return NULL_TREE;
38672 static tree
38673 ix86_handle_fndecl_attribute (tree *node, tree name,
38674 tree args ATTRIBUTE_UNUSED,
38675 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38677 if (TREE_CODE (*node) != FUNCTION_DECL)
38679 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38680 name);
38681 *no_add_attrs = true;
38683 return NULL_TREE;
38686 static bool
38687 ix86_ms_bitfield_layout_p (const_tree record_type)
38689 return ((TARGET_MS_BITFIELD_LAYOUT
38690 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38691 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38694 /* Returns an expression indicating where the this parameter is
38695 located on entry to the FUNCTION. */
38697 static rtx
38698 x86_this_parameter (tree function)
38700 tree type = TREE_TYPE (function);
38701 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38702 int nregs;
38704 if (TARGET_64BIT)
38706 const int *parm_regs;
38708 if (ix86_function_type_abi (type) == MS_ABI)
38709 parm_regs = x86_64_ms_abi_int_parameter_registers;
38710 else
38711 parm_regs = x86_64_int_parameter_registers;
38712 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38715 nregs = ix86_function_regparm (type, function);
38717 if (nregs > 0 && !stdarg_p (type))
38719 int regno;
38720 unsigned int ccvt = ix86_get_callcvt (type);
38722 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38723 regno = aggr ? DX_REG : CX_REG;
38724 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38726 regno = CX_REG;
38727 if (aggr)
38728 return gen_rtx_MEM (SImode,
38729 plus_constant (Pmode, stack_pointer_rtx, 4));
38731 else
38733 regno = AX_REG;
38734 if (aggr)
38736 regno = DX_REG;
38737 if (nregs == 1)
38738 return gen_rtx_MEM (SImode,
38739 plus_constant (Pmode,
38740 stack_pointer_rtx, 4));
38743 return gen_rtx_REG (SImode, regno);
38746 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38747 aggr ? 8 : 4));
38750 /* Determine whether x86_output_mi_thunk can succeed. */
38752 static bool
38753 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38754 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38755 HOST_WIDE_INT vcall_offset, const_tree function)
38757 /* 64-bit can handle anything. */
38758 if (TARGET_64BIT)
38759 return true;
38761 /* For 32-bit, everything's fine if we have one free register. */
38762 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38763 return true;
38765 /* Need a free register for vcall_offset. */
38766 if (vcall_offset)
38767 return false;
38769 /* Need a free register for GOT references. */
38770 if (flag_pic && !targetm.binds_local_p (function))
38771 return false;
38773 /* Otherwise ok. */
38774 return true;
38777 /* Output the assembler code for a thunk function. THUNK_DECL is the
38778 declaration for the thunk function itself, FUNCTION is the decl for
38779 the target function. DELTA is an immediate constant offset to be
38780 added to THIS. If VCALL_OFFSET is nonzero, the word at
38781 *(*this + vcall_offset) should be added to THIS. */
38783 static void
38784 x86_output_mi_thunk (FILE *file,
38785 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38786 HOST_WIDE_INT vcall_offset, tree function)
38788 rtx this_param = x86_this_parameter (function);
38789 rtx this_reg, tmp, fnaddr;
38790 unsigned int tmp_regno;
38792 if (TARGET_64BIT)
38793 tmp_regno = R10_REG;
38794 else
38796 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38797 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38798 tmp_regno = AX_REG;
38799 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38800 tmp_regno = DX_REG;
38801 else
38802 tmp_regno = CX_REG;
38805 emit_note (NOTE_INSN_PROLOGUE_END);
38807 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38808 pull it in now and let DELTA benefit. */
38809 if (REG_P (this_param))
38810 this_reg = this_param;
38811 else if (vcall_offset)
38813 /* Put the this parameter into %eax. */
38814 this_reg = gen_rtx_REG (Pmode, AX_REG);
38815 emit_move_insn (this_reg, this_param);
38817 else
38818 this_reg = NULL_RTX;
38820 /* Adjust the this parameter by a fixed constant. */
38821 if (delta)
38823 rtx delta_rtx = GEN_INT (delta);
38824 rtx delta_dst = this_reg ? this_reg : this_param;
38826 if (TARGET_64BIT)
38828 if (!x86_64_general_operand (delta_rtx, Pmode))
38830 tmp = gen_rtx_REG (Pmode, tmp_regno);
38831 emit_move_insn (tmp, delta_rtx);
38832 delta_rtx = tmp;
38836 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38839 /* Adjust the this parameter by a value stored in the vtable. */
38840 if (vcall_offset)
38842 rtx vcall_addr, vcall_mem, this_mem;
38844 tmp = gen_rtx_REG (Pmode, tmp_regno);
38846 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38847 if (Pmode != ptr_mode)
38848 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38849 emit_move_insn (tmp, this_mem);
38851 /* Adjust the this parameter. */
38852 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38853 if (TARGET_64BIT
38854 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38856 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38857 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38858 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38861 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38862 if (Pmode != ptr_mode)
38863 emit_insn (gen_addsi_1_zext (this_reg,
38864 gen_rtx_REG (ptr_mode,
38865 REGNO (this_reg)),
38866 vcall_mem));
38867 else
38868 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38871 /* If necessary, drop THIS back to its stack slot. */
38872 if (this_reg && this_reg != this_param)
38873 emit_move_insn (this_param, this_reg);
38875 fnaddr = XEXP (DECL_RTL (function), 0);
38876 if (TARGET_64BIT)
38878 if (!flag_pic || targetm.binds_local_p (function)
38879 || TARGET_PECOFF)
38881 else
38883 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38884 tmp = gen_rtx_CONST (Pmode, tmp);
38885 fnaddr = gen_const_mem (Pmode, tmp);
38888 else
38890 if (!flag_pic || targetm.binds_local_p (function))
38892 #if TARGET_MACHO
38893 else if (TARGET_MACHO)
38895 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38896 fnaddr = XEXP (fnaddr, 0);
38898 #endif /* TARGET_MACHO */
38899 else
38901 tmp = gen_rtx_REG (Pmode, CX_REG);
38902 output_set_got (tmp, NULL_RTX);
38904 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38905 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38906 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38907 fnaddr = gen_const_mem (Pmode, fnaddr);
38911 /* Our sibling call patterns do not allow memories, because we have no
38912 predicate that can distinguish between frame and non-frame memory.
38913 For our purposes here, we can get away with (ab)using a jump pattern,
38914 because we're going to do no optimization. */
38915 if (MEM_P (fnaddr))
38916 emit_jump_insn (gen_indirect_jump (fnaddr));
38917 else
38919 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38920 fnaddr = legitimize_pic_address (fnaddr,
38921 gen_rtx_REG (Pmode, tmp_regno));
38923 if (!sibcall_insn_operand (fnaddr, word_mode))
38925 tmp = gen_rtx_REG (word_mode, tmp_regno);
38926 if (GET_MODE (fnaddr) != word_mode)
38927 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38928 emit_move_insn (tmp, fnaddr);
38929 fnaddr = tmp;
38932 tmp = gen_rtx_MEM (QImode, fnaddr);
38933 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38934 tmp = emit_call_insn (tmp);
38935 SIBLING_CALL_P (tmp) = 1;
38937 emit_barrier ();
38939 /* Emit just enough of rest_of_compilation to get the insns emitted.
38940 Note that use_thunk calls assemble_start_function et al. */
38941 tmp = get_insns ();
38942 shorten_branches (tmp);
38943 final_start_function (tmp, file, 1);
38944 final (tmp, file, 1);
38945 final_end_function ();
38948 static void
38949 x86_file_start (void)
38951 default_file_start ();
38952 if (TARGET_16BIT)
38953 fputs ("\t.code16gcc\n", asm_out_file);
38954 #if TARGET_MACHO
38955 darwin_file_start ();
38956 #endif
38957 if (X86_FILE_START_VERSION_DIRECTIVE)
38958 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38959 if (X86_FILE_START_FLTUSED)
38960 fputs ("\t.global\t__fltused\n", asm_out_file);
38961 if (ix86_asm_dialect == ASM_INTEL)
38962 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38966 x86_field_alignment (tree field, int computed)
38968 enum machine_mode mode;
38969 tree type = TREE_TYPE (field);
38971 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38972 return computed;
38973 mode = TYPE_MODE (strip_array_types (type));
38974 if (mode == DFmode || mode == DCmode
38975 || GET_MODE_CLASS (mode) == MODE_INT
38976 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38977 return MIN (32, computed);
38978 return computed;
38981 /* Output assembler code to FILE to increment profiler label # LABELNO
38982 for profiling a function entry. */
38983 void
38984 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38986 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38987 : MCOUNT_NAME);
38989 if (TARGET_64BIT)
38991 #ifndef NO_PROFILE_COUNTERS
38992 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38993 #endif
38995 if (!TARGET_PECOFF && flag_pic)
38996 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38997 else
38998 fprintf (file, "\tcall\t%s\n", mcount_name);
39000 else if (flag_pic)
39002 #ifndef NO_PROFILE_COUNTERS
39003 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39004 LPREFIX, labelno);
39005 #endif
39006 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39008 else
39010 #ifndef NO_PROFILE_COUNTERS
39011 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39012 LPREFIX, labelno);
39013 #endif
39014 fprintf (file, "\tcall\t%s\n", mcount_name);
39018 /* We don't have exact information about the insn sizes, but we may assume
39019 quite safely that we are informed about all 1 byte insns and memory
39020 address sizes. This is enough to eliminate unnecessary padding in
39021 99% of cases. */
39023 static int
39024 min_insn_size (rtx insn)
39026 int l = 0, len;
39028 if (!INSN_P (insn) || !active_insn_p (insn))
39029 return 0;
39031 /* Discard alignments we've emit and jump instructions. */
39032 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39033 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39034 return 0;
39036 /* Important case - calls are always 5 bytes.
39037 It is common to have many calls in the row. */
39038 if (CALL_P (insn)
39039 && symbolic_reference_mentioned_p (PATTERN (insn))
39040 && !SIBLING_CALL_P (insn))
39041 return 5;
39042 len = get_attr_length (insn);
39043 if (len <= 1)
39044 return 1;
39046 /* For normal instructions we rely on get_attr_length being exact,
39047 with a few exceptions. */
39048 if (!JUMP_P (insn))
39050 enum attr_type type = get_attr_type (insn);
39052 switch (type)
39054 case TYPE_MULTI:
39055 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39056 || asm_noperands (PATTERN (insn)) >= 0)
39057 return 0;
39058 break;
39059 case TYPE_OTHER:
39060 case TYPE_FCMP:
39061 break;
39062 default:
39063 /* Otherwise trust get_attr_length. */
39064 return len;
39067 l = get_attr_length_address (insn);
39068 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39069 l = 4;
39071 if (l)
39072 return 1+l;
39073 else
39074 return 2;
39077 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39079 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39080 window. */
39082 static void
39083 ix86_avoid_jump_mispredicts (void)
39085 rtx insn, start = get_insns ();
39086 int nbytes = 0, njumps = 0;
39087 int isjump = 0;
39089 /* Look for all minimal intervals of instructions containing 4 jumps.
39090 The intervals are bounded by START and INSN. NBYTES is the total
39091 size of instructions in the interval including INSN and not including
39092 START. When the NBYTES is smaller than 16 bytes, it is possible
39093 that the end of START and INSN ends up in the same 16byte page.
39095 The smallest offset in the page INSN can start is the case where START
39096 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39097 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39099 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39100 have to, control transfer to label(s) can be performed through other
39101 means, and also we estimate minimum length of all asm stmts as 0. */
39102 for (insn = start; insn; insn = NEXT_INSN (insn))
39104 int min_size;
39106 if (LABEL_P (insn))
39108 int align = label_to_alignment (insn);
39109 int max_skip = label_to_max_skip (insn);
39111 if (max_skip > 15)
39112 max_skip = 15;
39113 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39114 already in the current 16 byte page, because otherwise
39115 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39116 bytes to reach 16 byte boundary. */
39117 if (align <= 0
39118 || (align <= 3 && max_skip != (1 << align) - 1))
39119 max_skip = 0;
39120 if (dump_file)
39121 fprintf (dump_file, "Label %i with max_skip %i\n",
39122 INSN_UID (insn), max_skip);
39123 if (max_skip)
39125 while (nbytes + max_skip >= 16)
39127 start = NEXT_INSN (start);
39128 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39129 || CALL_P (start))
39130 njumps--, isjump = 1;
39131 else
39132 isjump = 0;
39133 nbytes -= min_insn_size (start);
39136 continue;
39139 min_size = min_insn_size (insn);
39140 nbytes += min_size;
39141 if (dump_file)
39142 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39143 INSN_UID (insn), min_size);
39144 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39145 || CALL_P (insn))
39146 njumps++;
39147 else
39148 continue;
39150 while (njumps > 3)
39152 start = NEXT_INSN (start);
39153 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39154 || CALL_P (start))
39155 njumps--, isjump = 1;
39156 else
39157 isjump = 0;
39158 nbytes -= min_insn_size (start);
39160 gcc_assert (njumps >= 0);
39161 if (dump_file)
39162 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39163 INSN_UID (start), INSN_UID (insn), nbytes);
39165 if (njumps == 3 && isjump && nbytes < 16)
39167 int padsize = 15 - nbytes + min_insn_size (insn);
39169 if (dump_file)
39170 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39171 INSN_UID (insn), padsize);
39172 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39176 #endif
39178 /* AMD Athlon works faster
39179 when RET is not destination of conditional jump or directly preceded
39180 by other jump instruction. We avoid the penalty by inserting NOP just
39181 before the RET instructions in such cases. */
39182 static void
39183 ix86_pad_returns (void)
39185 edge e;
39186 edge_iterator ei;
39188 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39190 basic_block bb = e->src;
39191 rtx ret = BB_END (bb);
39192 rtx prev;
39193 bool replace = false;
39195 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39196 || optimize_bb_for_size_p (bb))
39197 continue;
39198 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39199 if (active_insn_p (prev) || LABEL_P (prev))
39200 break;
39201 if (prev && LABEL_P (prev))
39203 edge e;
39204 edge_iterator ei;
39206 FOR_EACH_EDGE (e, ei, bb->preds)
39207 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39208 && !(e->flags & EDGE_FALLTHRU))
39210 replace = true;
39211 break;
39214 if (!replace)
39216 prev = prev_active_insn (ret);
39217 if (prev
39218 && ((JUMP_P (prev) && any_condjump_p (prev))
39219 || CALL_P (prev)))
39220 replace = true;
39221 /* Empty functions get branch mispredict even when
39222 the jump destination is not visible to us. */
39223 if (!prev && !optimize_function_for_size_p (cfun))
39224 replace = true;
39226 if (replace)
39228 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39229 delete_insn (ret);
39234 /* Count the minimum number of instructions in BB. Return 4 if the
39235 number of instructions >= 4. */
39237 static int
39238 ix86_count_insn_bb (basic_block bb)
39240 rtx insn;
39241 int insn_count = 0;
39243 /* Count number of instructions in this block. Return 4 if the number
39244 of instructions >= 4. */
39245 FOR_BB_INSNS (bb, insn)
39247 /* Only happen in exit blocks. */
39248 if (JUMP_P (insn)
39249 && ANY_RETURN_P (PATTERN (insn)))
39250 break;
39252 if (NONDEBUG_INSN_P (insn)
39253 && GET_CODE (PATTERN (insn)) != USE
39254 && GET_CODE (PATTERN (insn)) != CLOBBER)
39256 insn_count++;
39257 if (insn_count >= 4)
39258 return insn_count;
39262 return insn_count;
39266 /* Count the minimum number of instructions in code path in BB.
39267 Return 4 if the number of instructions >= 4. */
39269 static int
39270 ix86_count_insn (basic_block bb)
39272 edge e;
39273 edge_iterator ei;
39274 int min_prev_count;
39276 /* Only bother counting instructions along paths with no
39277 more than 2 basic blocks between entry and exit. Given
39278 that BB has an edge to exit, determine if a predecessor
39279 of BB has an edge from entry. If so, compute the number
39280 of instructions in the predecessor block. If there
39281 happen to be multiple such blocks, compute the minimum. */
39282 min_prev_count = 4;
39283 FOR_EACH_EDGE (e, ei, bb->preds)
39285 edge prev_e;
39286 edge_iterator prev_ei;
39288 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39290 min_prev_count = 0;
39291 break;
39293 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39295 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39297 int count = ix86_count_insn_bb (e->src);
39298 if (count < min_prev_count)
39299 min_prev_count = count;
39300 break;
39305 if (min_prev_count < 4)
39306 min_prev_count += ix86_count_insn_bb (bb);
39308 return min_prev_count;
39311 /* Pad short function to 4 instructions. */
39313 static void
39314 ix86_pad_short_function (void)
39316 edge e;
39317 edge_iterator ei;
39319 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39321 rtx ret = BB_END (e->src);
39322 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39324 int insn_count = ix86_count_insn (e->src);
39326 /* Pad short function. */
39327 if (insn_count < 4)
39329 rtx insn = ret;
39331 /* Find epilogue. */
39332 while (insn
39333 && (!NOTE_P (insn)
39334 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39335 insn = PREV_INSN (insn);
39337 if (!insn)
39338 insn = ret;
39340 /* Two NOPs count as one instruction. */
39341 insn_count = 2 * (4 - insn_count);
39342 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39348 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39349 the epilogue, the Windows system unwinder will apply epilogue logic and
39350 produce incorrect offsets. This can be avoided by adding a nop between
39351 the last insn that can throw and the first insn of the epilogue. */
39353 static void
39354 ix86_seh_fixup_eh_fallthru (void)
39356 edge e;
39357 edge_iterator ei;
39359 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39361 rtx insn, next;
39363 /* Find the beginning of the epilogue. */
39364 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39365 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39366 break;
39367 if (insn == NULL)
39368 continue;
39370 /* We only care about preceding insns that can throw. */
39371 insn = prev_active_insn (insn);
39372 if (insn == NULL || !can_throw_internal (insn))
39373 continue;
39375 /* Do not separate calls from their debug information. */
39376 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39377 if (NOTE_P (next)
39378 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39379 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39380 insn = next;
39381 else
39382 break;
39384 emit_insn_after (gen_nops (const1_rtx), insn);
39388 /* Implement machine specific optimizations. We implement padding of returns
39389 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39390 static void
39391 ix86_reorg (void)
39393 /* We are freeing block_for_insn in the toplev to keep compatibility
39394 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39395 compute_bb_for_insn ();
39397 if (TARGET_SEH && current_function_has_exception_handlers ())
39398 ix86_seh_fixup_eh_fallthru ();
39400 if (optimize && optimize_function_for_speed_p (cfun))
39402 if (TARGET_PAD_SHORT_FUNCTION)
39403 ix86_pad_short_function ();
39404 else if (TARGET_PAD_RETURNS)
39405 ix86_pad_returns ();
39406 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39407 if (TARGET_FOUR_JUMP_LIMIT)
39408 ix86_avoid_jump_mispredicts ();
39409 #endif
39413 /* Return nonzero when QImode register that must be represented via REX prefix
39414 is used. */
39415 bool
39416 x86_extended_QIreg_mentioned_p (rtx insn)
39418 int i;
39419 extract_insn_cached (insn);
39420 for (i = 0; i < recog_data.n_operands; i++)
39421 if (GENERAL_REG_P (recog_data.operand[i])
39422 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39423 return true;
39424 return false;
39427 /* Return nonzero when P points to register encoded via REX prefix.
39428 Called via for_each_rtx. */
39429 static int
39430 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39432 unsigned int regno;
39433 if (!REG_P (*p))
39434 return 0;
39435 regno = REGNO (*p);
39436 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39439 /* Return true when INSN mentions register that must be encoded using REX
39440 prefix. */
39441 bool
39442 x86_extended_reg_mentioned_p (rtx insn)
39444 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39445 extended_reg_mentioned_1, NULL);
39448 /* If profitable, negate (without causing overflow) integer constant
39449 of mode MODE at location LOC. Return true in this case. */
39450 bool
39451 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39453 HOST_WIDE_INT val;
39455 if (!CONST_INT_P (*loc))
39456 return false;
39458 switch (mode)
39460 case DImode:
39461 /* DImode x86_64 constants must fit in 32 bits. */
39462 gcc_assert (x86_64_immediate_operand (*loc, mode));
39464 mode = SImode;
39465 break;
39467 case SImode:
39468 case HImode:
39469 case QImode:
39470 break;
39472 default:
39473 gcc_unreachable ();
39476 /* Avoid overflows. */
39477 if (mode_signbit_p (mode, *loc))
39478 return false;
39480 val = INTVAL (*loc);
39482 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39483 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39484 if ((val < 0 && val != -128)
39485 || val == 128)
39487 *loc = GEN_INT (-val);
39488 return true;
39491 return false;
39494 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39495 optabs would emit if we didn't have TFmode patterns. */
39497 void
39498 x86_emit_floatuns (rtx operands[2])
39500 rtx neglab, donelab, i0, i1, f0, in, out;
39501 enum machine_mode mode, inmode;
39503 inmode = GET_MODE (operands[1]);
39504 gcc_assert (inmode == SImode || inmode == DImode);
39506 out = operands[0];
39507 in = force_reg (inmode, operands[1]);
39508 mode = GET_MODE (out);
39509 neglab = gen_label_rtx ();
39510 donelab = gen_label_rtx ();
39511 f0 = gen_reg_rtx (mode);
39513 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39515 expand_float (out, in, 0);
39517 emit_jump_insn (gen_jump (donelab));
39518 emit_barrier ();
39520 emit_label (neglab);
39522 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39523 1, OPTAB_DIRECT);
39524 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39525 1, OPTAB_DIRECT);
39526 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39528 expand_float (f0, i0, 0);
39530 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39532 emit_label (donelab);
39535 /* AVX512F does support 64-byte integer vector operations,
39536 thus the longest vector we are faced with is V64QImode. */
39537 #define MAX_VECT_LEN 64
39539 struct expand_vec_perm_d
39541 rtx target, op0, op1;
39542 unsigned char perm[MAX_VECT_LEN];
39543 enum machine_mode vmode;
39544 unsigned char nelt;
39545 bool one_operand_p;
39546 bool testing_p;
39549 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39550 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39551 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39553 /* Get a vector mode of the same size as the original but with elements
39554 twice as wide. This is only guaranteed to apply to integral vectors. */
39556 static inline enum machine_mode
39557 get_mode_wider_vector (enum machine_mode o)
39559 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39560 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39561 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39562 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39563 return n;
39566 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39567 fill target with val via vec_duplicate. */
39569 static bool
39570 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39572 bool ok;
39573 rtx insn, dup;
39575 /* First attempt to recognize VAL as-is. */
39576 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39577 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39578 if (recog_memoized (insn) < 0)
39580 rtx seq;
39581 /* If that fails, force VAL into a register. */
39583 start_sequence ();
39584 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39585 seq = get_insns ();
39586 end_sequence ();
39587 if (seq)
39588 emit_insn_before (seq, insn);
39590 ok = recog_memoized (insn) >= 0;
39591 gcc_assert (ok);
39593 return true;
39596 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39597 with all elements equal to VAR. Return true if successful. */
39599 static bool
39600 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39601 rtx target, rtx val)
39603 bool ok;
39605 switch (mode)
39607 case V2SImode:
39608 case V2SFmode:
39609 if (!mmx_ok)
39610 return false;
39611 /* FALLTHRU */
39613 case V4DFmode:
39614 case V4DImode:
39615 case V8SFmode:
39616 case V8SImode:
39617 case V2DFmode:
39618 case V2DImode:
39619 case V4SFmode:
39620 case V4SImode:
39621 case V16SImode:
39622 case V8DImode:
39623 case V16SFmode:
39624 case V8DFmode:
39625 return ix86_vector_duplicate_value (mode, target, val);
39627 case V4HImode:
39628 if (!mmx_ok)
39629 return false;
39630 if (TARGET_SSE || TARGET_3DNOW_A)
39632 rtx x;
39634 val = gen_lowpart (SImode, val);
39635 x = gen_rtx_TRUNCATE (HImode, val);
39636 x = gen_rtx_VEC_DUPLICATE (mode, x);
39637 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39638 return true;
39640 goto widen;
39642 case V8QImode:
39643 if (!mmx_ok)
39644 return false;
39645 goto widen;
39647 case V8HImode:
39648 if (TARGET_SSE2)
39650 struct expand_vec_perm_d dperm;
39651 rtx tmp1, tmp2;
39653 permute:
39654 memset (&dperm, 0, sizeof (dperm));
39655 dperm.target = target;
39656 dperm.vmode = mode;
39657 dperm.nelt = GET_MODE_NUNITS (mode);
39658 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39659 dperm.one_operand_p = true;
39661 /* Extend to SImode using a paradoxical SUBREG. */
39662 tmp1 = gen_reg_rtx (SImode);
39663 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39665 /* Insert the SImode value as low element of a V4SImode vector. */
39666 tmp2 = gen_reg_rtx (V4SImode);
39667 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39668 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39670 ok = (expand_vec_perm_1 (&dperm)
39671 || expand_vec_perm_broadcast_1 (&dperm));
39672 gcc_assert (ok);
39673 return ok;
39675 goto widen;
39677 case V16QImode:
39678 if (TARGET_SSE2)
39679 goto permute;
39680 goto widen;
39682 widen:
39683 /* Replicate the value once into the next wider mode and recurse. */
39685 enum machine_mode smode, wsmode, wvmode;
39686 rtx x;
39688 smode = GET_MODE_INNER (mode);
39689 wvmode = get_mode_wider_vector (mode);
39690 wsmode = GET_MODE_INNER (wvmode);
39692 val = convert_modes (wsmode, smode, val, true);
39693 x = expand_simple_binop (wsmode, ASHIFT, val,
39694 GEN_INT (GET_MODE_BITSIZE (smode)),
39695 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39696 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39698 x = gen_reg_rtx (wvmode);
39699 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39700 gcc_assert (ok);
39701 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39702 return ok;
39705 case V16HImode:
39706 case V32QImode:
39708 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39709 rtx x = gen_reg_rtx (hvmode);
39711 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39712 gcc_assert (ok);
39714 x = gen_rtx_VEC_CONCAT (mode, x, x);
39715 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39717 return true;
39719 default:
39720 return false;
39724 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39725 whose ONE_VAR element is VAR, and other elements are zero. Return true
39726 if successful. */
39728 static bool
39729 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39730 rtx target, rtx var, int one_var)
39732 enum machine_mode vsimode;
39733 rtx new_target;
39734 rtx x, tmp;
39735 bool use_vector_set = false;
39737 switch (mode)
39739 case V2DImode:
39740 /* For SSE4.1, we normally use vector set. But if the second
39741 element is zero and inter-unit moves are OK, we use movq
39742 instead. */
39743 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39744 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39745 && one_var == 0));
39746 break;
39747 case V16QImode:
39748 case V4SImode:
39749 case V4SFmode:
39750 use_vector_set = TARGET_SSE4_1;
39751 break;
39752 case V8HImode:
39753 use_vector_set = TARGET_SSE2;
39754 break;
39755 case V4HImode:
39756 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39757 break;
39758 case V32QImode:
39759 case V16HImode:
39760 case V8SImode:
39761 case V8SFmode:
39762 case V4DFmode:
39763 use_vector_set = TARGET_AVX;
39764 break;
39765 case V4DImode:
39766 /* Use ix86_expand_vector_set in 64bit mode only. */
39767 use_vector_set = TARGET_AVX && TARGET_64BIT;
39768 break;
39769 default:
39770 break;
39773 if (use_vector_set)
39775 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39776 var = force_reg (GET_MODE_INNER (mode), var);
39777 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39778 return true;
39781 switch (mode)
39783 case V2SFmode:
39784 case V2SImode:
39785 if (!mmx_ok)
39786 return false;
39787 /* FALLTHRU */
39789 case V2DFmode:
39790 case V2DImode:
39791 if (one_var != 0)
39792 return false;
39793 var = force_reg (GET_MODE_INNER (mode), var);
39794 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39795 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39796 return true;
39798 case V4SFmode:
39799 case V4SImode:
39800 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39801 new_target = gen_reg_rtx (mode);
39802 else
39803 new_target = target;
39804 var = force_reg (GET_MODE_INNER (mode), var);
39805 x = gen_rtx_VEC_DUPLICATE (mode, var);
39806 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39807 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39808 if (one_var != 0)
39810 /* We need to shuffle the value to the correct position, so
39811 create a new pseudo to store the intermediate result. */
39813 /* With SSE2, we can use the integer shuffle insns. */
39814 if (mode != V4SFmode && TARGET_SSE2)
39816 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39817 const1_rtx,
39818 GEN_INT (one_var == 1 ? 0 : 1),
39819 GEN_INT (one_var == 2 ? 0 : 1),
39820 GEN_INT (one_var == 3 ? 0 : 1)));
39821 if (target != new_target)
39822 emit_move_insn (target, new_target);
39823 return true;
39826 /* Otherwise convert the intermediate result to V4SFmode and
39827 use the SSE1 shuffle instructions. */
39828 if (mode != V4SFmode)
39830 tmp = gen_reg_rtx (V4SFmode);
39831 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39833 else
39834 tmp = new_target;
39836 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39837 const1_rtx,
39838 GEN_INT (one_var == 1 ? 0 : 1),
39839 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39840 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39842 if (mode != V4SFmode)
39843 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39844 else if (tmp != target)
39845 emit_move_insn (target, tmp);
39847 else if (target != new_target)
39848 emit_move_insn (target, new_target);
39849 return true;
39851 case V8HImode:
39852 case V16QImode:
39853 vsimode = V4SImode;
39854 goto widen;
39855 case V4HImode:
39856 case V8QImode:
39857 if (!mmx_ok)
39858 return false;
39859 vsimode = V2SImode;
39860 goto widen;
39861 widen:
39862 if (one_var != 0)
39863 return false;
39865 /* Zero extend the variable element to SImode and recurse. */
39866 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39868 x = gen_reg_rtx (vsimode);
39869 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39870 var, one_var))
39871 gcc_unreachable ();
39873 emit_move_insn (target, gen_lowpart (mode, x));
39874 return true;
39876 default:
39877 return false;
39881 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39882 consisting of the values in VALS. It is known that all elements
39883 except ONE_VAR are constants. Return true if successful. */
39885 static bool
39886 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39887 rtx target, rtx vals, int one_var)
39889 rtx var = XVECEXP (vals, 0, one_var);
39890 enum machine_mode wmode;
39891 rtx const_vec, x;
39893 const_vec = copy_rtx (vals);
39894 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39895 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39897 switch (mode)
39899 case V2DFmode:
39900 case V2DImode:
39901 case V2SFmode:
39902 case V2SImode:
39903 /* For the two element vectors, it's just as easy to use
39904 the general case. */
39905 return false;
39907 case V4DImode:
39908 /* Use ix86_expand_vector_set in 64bit mode only. */
39909 if (!TARGET_64BIT)
39910 return false;
39911 case V4DFmode:
39912 case V8SFmode:
39913 case V8SImode:
39914 case V16HImode:
39915 case V32QImode:
39916 case V4SFmode:
39917 case V4SImode:
39918 case V8HImode:
39919 case V4HImode:
39920 break;
39922 case V16QImode:
39923 if (TARGET_SSE4_1)
39924 break;
39925 wmode = V8HImode;
39926 goto widen;
39927 case V8QImode:
39928 wmode = V4HImode;
39929 goto widen;
39930 widen:
39931 /* There's no way to set one QImode entry easily. Combine
39932 the variable value with its adjacent constant value, and
39933 promote to an HImode set. */
39934 x = XVECEXP (vals, 0, one_var ^ 1);
39935 if (one_var & 1)
39937 var = convert_modes (HImode, QImode, var, true);
39938 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39939 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39940 x = GEN_INT (INTVAL (x) & 0xff);
39942 else
39944 var = convert_modes (HImode, QImode, var, true);
39945 x = gen_int_mode (INTVAL (x) << 8, HImode);
39947 if (x != const0_rtx)
39948 var = expand_simple_binop (HImode, IOR, var, x, var,
39949 1, OPTAB_LIB_WIDEN);
39951 x = gen_reg_rtx (wmode);
39952 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39953 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39955 emit_move_insn (target, gen_lowpart (mode, x));
39956 return true;
39958 default:
39959 return false;
39962 emit_move_insn (target, const_vec);
39963 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39964 return true;
39967 /* A subroutine of ix86_expand_vector_init_general. Use vector
39968 concatenate to handle the most general case: all values variable,
39969 and none identical. */
39971 static void
39972 ix86_expand_vector_init_concat (enum machine_mode mode,
39973 rtx target, rtx *ops, int n)
39975 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39976 rtx first[16], second[8], third[4];
39977 rtvec v;
39978 int i, j;
39980 switch (n)
39982 case 2:
39983 switch (mode)
39985 case V16SImode:
39986 cmode = V8SImode;
39987 break;
39988 case V16SFmode:
39989 cmode = V8SFmode;
39990 break;
39991 case V8DImode:
39992 cmode = V4DImode;
39993 break;
39994 case V8DFmode:
39995 cmode = V4DFmode;
39996 break;
39997 case V8SImode:
39998 cmode = V4SImode;
39999 break;
40000 case V8SFmode:
40001 cmode = V4SFmode;
40002 break;
40003 case V4DImode:
40004 cmode = V2DImode;
40005 break;
40006 case V4DFmode:
40007 cmode = V2DFmode;
40008 break;
40009 case V4SImode:
40010 cmode = V2SImode;
40011 break;
40012 case V4SFmode:
40013 cmode = V2SFmode;
40014 break;
40015 case V2DImode:
40016 cmode = DImode;
40017 break;
40018 case V2SImode:
40019 cmode = SImode;
40020 break;
40021 case V2DFmode:
40022 cmode = DFmode;
40023 break;
40024 case V2SFmode:
40025 cmode = SFmode;
40026 break;
40027 default:
40028 gcc_unreachable ();
40031 if (!register_operand (ops[1], cmode))
40032 ops[1] = force_reg (cmode, ops[1]);
40033 if (!register_operand (ops[0], cmode))
40034 ops[0] = force_reg (cmode, ops[0]);
40035 emit_insn (gen_rtx_SET (VOIDmode, target,
40036 gen_rtx_VEC_CONCAT (mode, ops[0],
40037 ops[1])));
40038 break;
40040 case 4:
40041 switch (mode)
40043 case V4DImode:
40044 cmode = V2DImode;
40045 break;
40046 case V4DFmode:
40047 cmode = V2DFmode;
40048 break;
40049 case V4SImode:
40050 cmode = V2SImode;
40051 break;
40052 case V4SFmode:
40053 cmode = V2SFmode;
40054 break;
40055 default:
40056 gcc_unreachable ();
40058 goto half;
40060 case 8:
40061 switch (mode)
40063 case V8DImode:
40064 cmode = V2DImode;
40065 hmode = V4DImode;
40066 break;
40067 case V8DFmode:
40068 cmode = V2DFmode;
40069 hmode = V4DFmode;
40070 break;
40071 case V8SImode:
40072 cmode = V2SImode;
40073 hmode = V4SImode;
40074 break;
40075 case V8SFmode:
40076 cmode = V2SFmode;
40077 hmode = V4SFmode;
40078 break;
40079 default:
40080 gcc_unreachable ();
40082 goto half;
40084 case 16:
40085 switch (mode)
40087 case V16SImode:
40088 cmode = V2SImode;
40089 hmode = V4SImode;
40090 gmode = V8SImode;
40091 break;
40092 case V16SFmode:
40093 cmode = V2SFmode;
40094 hmode = V4SFmode;
40095 gmode = V8SFmode;
40096 break;
40097 default:
40098 gcc_unreachable ();
40100 goto half;
40102 half:
40103 /* FIXME: We process inputs backward to help RA. PR 36222. */
40104 i = n - 1;
40105 j = (n >> 1) - 1;
40106 for (; i > 0; i -= 2, j--)
40108 first[j] = gen_reg_rtx (cmode);
40109 v = gen_rtvec (2, ops[i - 1], ops[i]);
40110 ix86_expand_vector_init (false, first[j],
40111 gen_rtx_PARALLEL (cmode, v));
40114 n >>= 1;
40115 if (n > 4)
40117 gcc_assert (hmode != VOIDmode);
40118 gcc_assert (gmode != VOIDmode);
40119 for (i = j = 0; i < n; i += 2, j++)
40121 second[j] = gen_reg_rtx (hmode);
40122 ix86_expand_vector_init_concat (hmode, second [j],
40123 &first [i], 2);
40125 n >>= 1;
40126 for (i = j = 0; i < n; i += 2, j++)
40128 third[j] = gen_reg_rtx (gmode);
40129 ix86_expand_vector_init_concat (gmode, third[j],
40130 &second[i], 2);
40132 n >>= 1;
40133 ix86_expand_vector_init_concat (mode, target, third, n);
40135 else if (n > 2)
40137 gcc_assert (hmode != VOIDmode);
40138 for (i = j = 0; i < n; i += 2, j++)
40140 second[j] = gen_reg_rtx (hmode);
40141 ix86_expand_vector_init_concat (hmode, second [j],
40142 &first [i], 2);
40144 n >>= 1;
40145 ix86_expand_vector_init_concat (mode, target, second, n);
40147 else
40148 ix86_expand_vector_init_concat (mode, target, first, n);
40149 break;
40151 default:
40152 gcc_unreachable ();
40156 /* A subroutine of ix86_expand_vector_init_general. Use vector
40157 interleave to handle the most general case: all values variable,
40158 and none identical. */
40160 static void
40161 ix86_expand_vector_init_interleave (enum machine_mode mode,
40162 rtx target, rtx *ops, int n)
40164 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40165 int i, j;
40166 rtx op0, op1;
40167 rtx (*gen_load_even) (rtx, rtx, rtx);
40168 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40169 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40171 switch (mode)
40173 case V8HImode:
40174 gen_load_even = gen_vec_setv8hi;
40175 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40176 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40177 inner_mode = HImode;
40178 first_imode = V4SImode;
40179 second_imode = V2DImode;
40180 third_imode = VOIDmode;
40181 break;
40182 case V16QImode:
40183 gen_load_even = gen_vec_setv16qi;
40184 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40185 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40186 inner_mode = QImode;
40187 first_imode = V8HImode;
40188 second_imode = V4SImode;
40189 third_imode = V2DImode;
40190 break;
40191 default:
40192 gcc_unreachable ();
40195 for (i = 0; i < n; i++)
40197 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40198 op0 = gen_reg_rtx (SImode);
40199 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40201 /* Insert the SImode value as low element of V4SImode vector. */
40202 op1 = gen_reg_rtx (V4SImode);
40203 op0 = gen_rtx_VEC_MERGE (V4SImode,
40204 gen_rtx_VEC_DUPLICATE (V4SImode,
40205 op0),
40206 CONST0_RTX (V4SImode),
40207 const1_rtx);
40208 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40210 /* Cast the V4SImode vector back to a vector in orignal mode. */
40211 op0 = gen_reg_rtx (mode);
40212 emit_move_insn (op0, gen_lowpart (mode, op1));
40214 /* Load even elements into the second position. */
40215 emit_insn (gen_load_even (op0,
40216 force_reg (inner_mode,
40217 ops [i + i + 1]),
40218 const1_rtx));
40220 /* Cast vector to FIRST_IMODE vector. */
40221 ops[i] = gen_reg_rtx (first_imode);
40222 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40225 /* Interleave low FIRST_IMODE vectors. */
40226 for (i = j = 0; i < n; i += 2, j++)
40228 op0 = gen_reg_rtx (first_imode);
40229 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40231 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40232 ops[j] = gen_reg_rtx (second_imode);
40233 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40236 /* Interleave low SECOND_IMODE vectors. */
40237 switch (second_imode)
40239 case V4SImode:
40240 for (i = j = 0; i < n / 2; i += 2, j++)
40242 op0 = gen_reg_rtx (second_imode);
40243 emit_insn (gen_interleave_second_low (op0, ops[i],
40244 ops[i + 1]));
40246 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40247 vector. */
40248 ops[j] = gen_reg_rtx (third_imode);
40249 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40251 second_imode = V2DImode;
40252 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40253 /* FALLTHRU */
40255 case V2DImode:
40256 op0 = gen_reg_rtx (second_imode);
40257 emit_insn (gen_interleave_second_low (op0, ops[0],
40258 ops[1]));
40260 /* Cast the SECOND_IMODE vector back to a vector on original
40261 mode. */
40262 emit_insn (gen_rtx_SET (VOIDmode, target,
40263 gen_lowpart (mode, op0)));
40264 break;
40266 default:
40267 gcc_unreachable ();
40271 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40272 all values variable, and none identical. */
40274 static void
40275 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40276 rtx target, rtx vals)
40278 rtx ops[64], op0, op1;
40279 enum machine_mode half_mode = VOIDmode;
40280 int n, i;
40282 switch (mode)
40284 case V2SFmode:
40285 case V2SImode:
40286 if (!mmx_ok && !TARGET_SSE)
40287 break;
40288 /* FALLTHRU */
40290 case V16SImode:
40291 case V16SFmode:
40292 case V8DFmode:
40293 case V8DImode:
40294 case V8SFmode:
40295 case V8SImode:
40296 case V4DFmode:
40297 case V4DImode:
40298 case V4SFmode:
40299 case V4SImode:
40300 case V2DFmode:
40301 case V2DImode:
40302 n = GET_MODE_NUNITS (mode);
40303 for (i = 0; i < n; i++)
40304 ops[i] = XVECEXP (vals, 0, i);
40305 ix86_expand_vector_init_concat (mode, target, ops, n);
40306 return;
40308 case V32QImode:
40309 half_mode = V16QImode;
40310 goto half;
40312 case V16HImode:
40313 half_mode = V8HImode;
40314 goto half;
40316 half:
40317 n = GET_MODE_NUNITS (mode);
40318 for (i = 0; i < n; i++)
40319 ops[i] = XVECEXP (vals, 0, i);
40320 op0 = gen_reg_rtx (half_mode);
40321 op1 = gen_reg_rtx (half_mode);
40322 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40323 n >> 2);
40324 ix86_expand_vector_init_interleave (half_mode, op1,
40325 &ops [n >> 1], n >> 2);
40326 emit_insn (gen_rtx_SET (VOIDmode, target,
40327 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40328 return;
40330 case V16QImode:
40331 if (!TARGET_SSE4_1)
40332 break;
40333 /* FALLTHRU */
40335 case V8HImode:
40336 if (!TARGET_SSE2)
40337 break;
40339 /* Don't use ix86_expand_vector_init_interleave if we can't
40340 move from GPR to SSE register directly. */
40341 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40342 break;
40344 n = GET_MODE_NUNITS (mode);
40345 for (i = 0; i < n; i++)
40346 ops[i] = XVECEXP (vals, 0, i);
40347 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40348 return;
40350 case V4HImode:
40351 case V8QImode:
40352 break;
40354 default:
40355 gcc_unreachable ();
40359 int i, j, n_elts, n_words, n_elt_per_word;
40360 enum machine_mode inner_mode;
40361 rtx words[4], shift;
40363 inner_mode = GET_MODE_INNER (mode);
40364 n_elts = GET_MODE_NUNITS (mode);
40365 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40366 n_elt_per_word = n_elts / n_words;
40367 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40369 for (i = 0; i < n_words; ++i)
40371 rtx word = NULL_RTX;
40373 for (j = 0; j < n_elt_per_word; ++j)
40375 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40376 elt = convert_modes (word_mode, inner_mode, elt, true);
40378 if (j == 0)
40379 word = elt;
40380 else
40382 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40383 word, 1, OPTAB_LIB_WIDEN);
40384 word = expand_simple_binop (word_mode, IOR, word, elt,
40385 word, 1, OPTAB_LIB_WIDEN);
40389 words[i] = word;
40392 if (n_words == 1)
40393 emit_move_insn (target, gen_lowpart (mode, words[0]));
40394 else if (n_words == 2)
40396 rtx tmp = gen_reg_rtx (mode);
40397 emit_clobber (tmp);
40398 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40399 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40400 emit_move_insn (target, tmp);
40402 else if (n_words == 4)
40404 rtx tmp = gen_reg_rtx (V4SImode);
40405 gcc_assert (word_mode == SImode);
40406 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40407 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40408 emit_move_insn (target, gen_lowpart (mode, tmp));
40410 else
40411 gcc_unreachable ();
40415 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40416 instructions unless MMX_OK is true. */
40418 void
40419 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40421 enum machine_mode mode = GET_MODE (target);
40422 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40423 int n_elts = GET_MODE_NUNITS (mode);
40424 int n_var = 0, one_var = -1;
40425 bool all_same = true, all_const_zero = true;
40426 int i;
40427 rtx x;
40429 for (i = 0; i < n_elts; ++i)
40431 x = XVECEXP (vals, 0, i);
40432 if (!(CONST_INT_P (x)
40433 || GET_CODE (x) == CONST_DOUBLE
40434 || GET_CODE (x) == CONST_FIXED))
40435 n_var++, one_var = i;
40436 else if (x != CONST0_RTX (inner_mode))
40437 all_const_zero = false;
40438 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40439 all_same = false;
40442 /* Constants are best loaded from the constant pool. */
40443 if (n_var == 0)
40445 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40446 return;
40449 /* If all values are identical, broadcast the value. */
40450 if (all_same
40451 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40452 XVECEXP (vals, 0, 0)))
40453 return;
40455 /* Values where only one field is non-constant are best loaded from
40456 the pool and overwritten via move later. */
40457 if (n_var == 1)
40459 if (all_const_zero
40460 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40461 XVECEXP (vals, 0, one_var),
40462 one_var))
40463 return;
40465 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40466 return;
40469 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40472 void
40473 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40475 enum machine_mode mode = GET_MODE (target);
40476 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40477 enum machine_mode half_mode;
40478 bool use_vec_merge = false;
40479 rtx tmp;
40480 static rtx (*gen_extract[6][2]) (rtx, rtx)
40482 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40483 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40484 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40485 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40486 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40487 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40489 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40491 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40492 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40493 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40494 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40495 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40496 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40498 int i, j, n;
40500 switch (mode)
40502 case V2SFmode:
40503 case V2SImode:
40504 if (mmx_ok)
40506 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40507 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40508 if (elt == 0)
40509 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40510 else
40511 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40512 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40513 return;
40515 break;
40517 case V2DImode:
40518 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40519 if (use_vec_merge)
40520 break;
40522 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40523 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40524 if (elt == 0)
40525 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40526 else
40527 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40528 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40529 return;
40531 case V2DFmode:
40533 rtx op0, op1;
40535 /* For the two element vectors, we implement a VEC_CONCAT with
40536 the extraction of the other element. */
40538 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40539 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40541 if (elt == 0)
40542 op0 = val, op1 = tmp;
40543 else
40544 op0 = tmp, op1 = val;
40546 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40547 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40549 return;
40551 case V4SFmode:
40552 use_vec_merge = TARGET_SSE4_1;
40553 if (use_vec_merge)
40554 break;
40556 switch (elt)
40558 case 0:
40559 use_vec_merge = true;
40560 break;
40562 case 1:
40563 /* tmp = target = A B C D */
40564 tmp = copy_to_reg (target);
40565 /* target = A A B B */
40566 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40567 /* target = X A B B */
40568 ix86_expand_vector_set (false, target, val, 0);
40569 /* target = A X C D */
40570 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40571 const1_rtx, const0_rtx,
40572 GEN_INT (2+4), GEN_INT (3+4)));
40573 return;
40575 case 2:
40576 /* tmp = target = A B C D */
40577 tmp = copy_to_reg (target);
40578 /* tmp = X B C D */
40579 ix86_expand_vector_set (false, tmp, val, 0);
40580 /* target = A B X D */
40581 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40582 const0_rtx, const1_rtx,
40583 GEN_INT (0+4), GEN_INT (3+4)));
40584 return;
40586 case 3:
40587 /* tmp = target = A B C D */
40588 tmp = copy_to_reg (target);
40589 /* tmp = X B C D */
40590 ix86_expand_vector_set (false, tmp, val, 0);
40591 /* target = A B X D */
40592 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40593 const0_rtx, const1_rtx,
40594 GEN_INT (2+4), GEN_INT (0+4)));
40595 return;
40597 default:
40598 gcc_unreachable ();
40600 break;
40602 case V4SImode:
40603 use_vec_merge = TARGET_SSE4_1;
40604 if (use_vec_merge)
40605 break;
40607 /* Element 0 handled by vec_merge below. */
40608 if (elt == 0)
40610 use_vec_merge = true;
40611 break;
40614 if (TARGET_SSE2)
40616 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40617 store into element 0, then shuffle them back. */
40619 rtx order[4];
40621 order[0] = GEN_INT (elt);
40622 order[1] = const1_rtx;
40623 order[2] = const2_rtx;
40624 order[3] = GEN_INT (3);
40625 order[elt] = const0_rtx;
40627 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40628 order[1], order[2], order[3]));
40630 ix86_expand_vector_set (false, target, val, 0);
40632 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40633 order[1], order[2], order[3]));
40635 else
40637 /* For SSE1, we have to reuse the V4SF code. */
40638 rtx t = gen_reg_rtx (V4SFmode);
40639 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40640 emit_move_insn (target, gen_lowpart (mode, t));
40642 return;
40644 case V8HImode:
40645 use_vec_merge = TARGET_SSE2;
40646 break;
40647 case V4HImode:
40648 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40649 break;
40651 case V16QImode:
40652 use_vec_merge = TARGET_SSE4_1;
40653 break;
40655 case V8QImode:
40656 break;
40658 case V32QImode:
40659 half_mode = V16QImode;
40660 j = 0;
40661 n = 16;
40662 goto half;
40664 case V16HImode:
40665 half_mode = V8HImode;
40666 j = 1;
40667 n = 8;
40668 goto half;
40670 case V8SImode:
40671 half_mode = V4SImode;
40672 j = 2;
40673 n = 4;
40674 goto half;
40676 case V4DImode:
40677 half_mode = V2DImode;
40678 j = 3;
40679 n = 2;
40680 goto half;
40682 case V8SFmode:
40683 half_mode = V4SFmode;
40684 j = 4;
40685 n = 4;
40686 goto half;
40688 case V4DFmode:
40689 half_mode = V2DFmode;
40690 j = 5;
40691 n = 2;
40692 goto half;
40694 half:
40695 /* Compute offset. */
40696 i = elt / n;
40697 elt %= n;
40699 gcc_assert (i <= 1);
40701 /* Extract the half. */
40702 tmp = gen_reg_rtx (half_mode);
40703 emit_insn (gen_extract[j][i] (tmp, target));
40705 /* Put val in tmp at elt. */
40706 ix86_expand_vector_set (false, tmp, val, elt);
40708 /* Put it back. */
40709 emit_insn (gen_insert[j][i] (target, target, tmp));
40710 return;
40712 default:
40713 break;
40716 if (use_vec_merge)
40718 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40719 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40720 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40722 else
40724 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40726 emit_move_insn (mem, target);
40728 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40729 emit_move_insn (tmp, val);
40731 emit_move_insn (target, mem);
40735 void
40736 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40738 enum machine_mode mode = GET_MODE (vec);
40739 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40740 bool use_vec_extr = false;
40741 rtx tmp;
40743 switch (mode)
40745 case V2SImode:
40746 case V2SFmode:
40747 if (!mmx_ok)
40748 break;
40749 /* FALLTHRU */
40751 case V2DFmode:
40752 case V2DImode:
40753 use_vec_extr = true;
40754 break;
40756 case V4SFmode:
40757 use_vec_extr = TARGET_SSE4_1;
40758 if (use_vec_extr)
40759 break;
40761 switch (elt)
40763 case 0:
40764 tmp = vec;
40765 break;
40767 case 1:
40768 case 3:
40769 tmp = gen_reg_rtx (mode);
40770 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40771 GEN_INT (elt), GEN_INT (elt),
40772 GEN_INT (elt+4), GEN_INT (elt+4)));
40773 break;
40775 case 2:
40776 tmp = gen_reg_rtx (mode);
40777 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40778 break;
40780 default:
40781 gcc_unreachable ();
40783 vec = tmp;
40784 use_vec_extr = true;
40785 elt = 0;
40786 break;
40788 case V4SImode:
40789 use_vec_extr = TARGET_SSE4_1;
40790 if (use_vec_extr)
40791 break;
40793 if (TARGET_SSE2)
40795 switch (elt)
40797 case 0:
40798 tmp = vec;
40799 break;
40801 case 1:
40802 case 3:
40803 tmp = gen_reg_rtx (mode);
40804 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40805 GEN_INT (elt), GEN_INT (elt),
40806 GEN_INT (elt), GEN_INT (elt)));
40807 break;
40809 case 2:
40810 tmp = gen_reg_rtx (mode);
40811 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40812 break;
40814 default:
40815 gcc_unreachable ();
40817 vec = tmp;
40818 use_vec_extr = true;
40819 elt = 0;
40821 else
40823 /* For SSE1, we have to reuse the V4SF code. */
40824 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40825 gen_lowpart (V4SFmode, vec), elt);
40826 return;
40828 break;
40830 case V8HImode:
40831 use_vec_extr = TARGET_SSE2;
40832 break;
40833 case V4HImode:
40834 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40835 break;
40837 case V16QImode:
40838 use_vec_extr = TARGET_SSE4_1;
40839 break;
40841 case V8SFmode:
40842 if (TARGET_AVX)
40844 tmp = gen_reg_rtx (V4SFmode);
40845 if (elt < 4)
40846 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40847 else
40848 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40849 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40850 return;
40852 break;
40854 case V4DFmode:
40855 if (TARGET_AVX)
40857 tmp = gen_reg_rtx (V2DFmode);
40858 if (elt < 2)
40859 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40860 else
40861 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40862 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40863 return;
40865 break;
40867 case V32QImode:
40868 if (TARGET_AVX)
40870 tmp = gen_reg_rtx (V16QImode);
40871 if (elt < 16)
40872 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40873 else
40874 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40875 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40876 return;
40878 break;
40880 case V16HImode:
40881 if (TARGET_AVX)
40883 tmp = gen_reg_rtx (V8HImode);
40884 if (elt < 8)
40885 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40886 else
40887 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40888 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40889 return;
40891 break;
40893 case V8SImode:
40894 if (TARGET_AVX)
40896 tmp = gen_reg_rtx (V4SImode);
40897 if (elt < 4)
40898 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40899 else
40900 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40901 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40902 return;
40904 break;
40906 case V4DImode:
40907 if (TARGET_AVX)
40909 tmp = gen_reg_rtx (V2DImode);
40910 if (elt < 2)
40911 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40912 else
40913 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40914 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40915 return;
40917 break;
40919 case V16SFmode:
40920 tmp = gen_reg_rtx (V8SFmode);
40921 if (elt < 8)
40922 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40923 else
40924 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40925 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40926 return;
40928 case V8DFmode:
40929 tmp = gen_reg_rtx (V4DFmode);
40930 if (elt < 4)
40931 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40932 else
40933 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40934 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40935 return;
40937 case V16SImode:
40938 tmp = gen_reg_rtx (V8SImode);
40939 if (elt < 8)
40940 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40941 else
40942 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40943 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40944 return;
40946 case V8DImode:
40947 tmp = gen_reg_rtx (V4DImode);
40948 if (elt < 4)
40949 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40950 else
40951 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40952 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40953 return;
40955 case V8QImode:
40956 /* ??? Could extract the appropriate HImode element and shift. */
40957 default:
40958 break;
40961 if (use_vec_extr)
40963 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40964 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40966 /* Let the rtl optimizers know about the zero extension performed. */
40967 if (inner_mode == QImode || inner_mode == HImode)
40969 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40970 target = gen_lowpart (SImode, target);
40973 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40975 else
40977 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40979 emit_move_insn (mem, vec);
40981 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40982 emit_move_insn (target, tmp);
40986 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40987 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40988 The upper bits of DEST are undefined, though they shouldn't cause
40989 exceptions (some bits from src or all zeros are ok). */
40991 static void
40992 emit_reduc_half (rtx dest, rtx src, int i)
40994 rtx tem, d = dest;
40995 switch (GET_MODE (src))
40997 case V4SFmode:
40998 if (i == 128)
40999 tem = gen_sse_movhlps (dest, src, src);
41000 else
41001 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41002 GEN_INT (1 + 4), GEN_INT (1 + 4));
41003 break;
41004 case V2DFmode:
41005 tem = gen_vec_interleave_highv2df (dest, src, src);
41006 break;
41007 case V16QImode:
41008 case V8HImode:
41009 case V4SImode:
41010 case V2DImode:
41011 d = gen_reg_rtx (V1TImode);
41012 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41013 GEN_INT (i / 2));
41014 break;
41015 case V8SFmode:
41016 if (i == 256)
41017 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41018 else
41019 tem = gen_avx_shufps256 (dest, src, src,
41020 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41021 break;
41022 case V4DFmode:
41023 if (i == 256)
41024 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41025 else
41026 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41027 break;
41028 case V32QImode:
41029 case V16HImode:
41030 case V8SImode:
41031 case V4DImode:
41032 if (i == 256)
41034 if (GET_MODE (dest) != V4DImode)
41035 d = gen_reg_rtx (V4DImode);
41036 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41037 gen_lowpart (V4DImode, src),
41038 const1_rtx);
41040 else
41042 d = gen_reg_rtx (V2TImode);
41043 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41044 GEN_INT (i / 2));
41046 break;
41047 case V16SImode:
41048 case V16SFmode:
41049 case V8DImode:
41050 case V8DFmode:
41051 if (i > 128)
41052 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41053 gen_lowpart (V16SImode, src),
41054 gen_lowpart (V16SImode, src),
41055 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41056 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41057 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41058 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41059 GEN_INT (0xC), GEN_INT (0xD),
41060 GEN_INT (0xE), GEN_INT (0xF),
41061 GEN_INT (0x10), GEN_INT (0x11),
41062 GEN_INT (0x12), GEN_INT (0x13),
41063 GEN_INT (0x14), GEN_INT (0x15),
41064 GEN_INT (0x16), GEN_INT (0x17));
41065 else
41066 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41067 gen_lowpart (V16SImode, src),
41068 GEN_INT (i == 128 ? 0x2 : 0x1),
41069 GEN_INT (0x3),
41070 GEN_INT (0x3),
41071 GEN_INT (0x3),
41072 GEN_INT (i == 128 ? 0x6 : 0x5),
41073 GEN_INT (0x7),
41074 GEN_INT (0x7),
41075 GEN_INT (0x7),
41076 GEN_INT (i == 128 ? 0xA : 0x9),
41077 GEN_INT (0xB),
41078 GEN_INT (0xB),
41079 GEN_INT (0xB),
41080 GEN_INT (i == 128 ? 0xE : 0xD),
41081 GEN_INT (0xF),
41082 GEN_INT (0xF),
41083 GEN_INT (0xF));
41084 break;
41085 default:
41086 gcc_unreachable ();
41088 emit_insn (tem);
41089 if (d != dest)
41090 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41093 /* Expand a vector reduction. FN is the binary pattern to reduce;
41094 DEST is the destination; IN is the input vector. */
41096 void
41097 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41099 rtx half, dst, vec = in;
41100 enum machine_mode mode = GET_MODE (in);
41101 int i;
41103 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41104 if (TARGET_SSE4_1
41105 && mode == V8HImode
41106 && fn == gen_uminv8hi3)
41108 emit_insn (gen_sse4_1_phminposuw (dest, in));
41109 return;
41112 for (i = GET_MODE_BITSIZE (mode);
41113 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41114 i >>= 1)
41116 half = gen_reg_rtx (mode);
41117 emit_reduc_half (half, vec, i);
41118 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41119 dst = dest;
41120 else
41121 dst = gen_reg_rtx (mode);
41122 emit_insn (fn (dst, half, vec));
41123 vec = dst;
41127 /* Target hook for scalar_mode_supported_p. */
41128 static bool
41129 ix86_scalar_mode_supported_p (enum machine_mode mode)
41131 if (DECIMAL_FLOAT_MODE_P (mode))
41132 return default_decimal_float_supported_p ();
41133 else if (mode == TFmode)
41134 return true;
41135 else
41136 return default_scalar_mode_supported_p (mode);
41139 /* Implements target hook vector_mode_supported_p. */
41140 static bool
41141 ix86_vector_mode_supported_p (enum machine_mode mode)
41143 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41144 return true;
41145 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41146 return true;
41147 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41148 return true;
41149 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41150 return true;
41151 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41152 return true;
41153 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41154 return true;
41155 return false;
41158 /* Target hook for c_mode_for_suffix. */
41159 static enum machine_mode
41160 ix86_c_mode_for_suffix (char suffix)
41162 if (suffix == 'q')
41163 return TFmode;
41164 if (suffix == 'w')
41165 return XFmode;
41167 return VOIDmode;
41170 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41172 We do this in the new i386 backend to maintain source compatibility
41173 with the old cc0-based compiler. */
41175 static tree
41176 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41177 tree inputs ATTRIBUTE_UNUSED,
41178 tree clobbers)
41180 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41181 clobbers);
41182 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41183 clobbers);
41184 return clobbers;
41187 /* Implements target vector targetm.asm.encode_section_info. */
41189 static void ATTRIBUTE_UNUSED
41190 ix86_encode_section_info (tree decl, rtx rtl, int first)
41192 default_encode_section_info (decl, rtl, first);
41194 if (ix86_in_large_data_p (decl))
41195 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41198 /* Worker function for REVERSE_CONDITION. */
41200 enum rtx_code
41201 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41203 return (mode != CCFPmode && mode != CCFPUmode
41204 ? reverse_condition (code)
41205 : reverse_condition_maybe_unordered (code));
41208 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41209 to OPERANDS[0]. */
41211 const char *
41212 output_387_reg_move (rtx insn, rtx *operands)
41214 if (REG_P (operands[0]))
41216 if (REG_P (operands[1])
41217 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41219 if (REGNO (operands[0]) == FIRST_STACK_REG)
41220 return output_387_ffreep (operands, 0);
41221 return "fstp\t%y0";
41223 if (STACK_TOP_P (operands[0]))
41224 return "fld%Z1\t%y1";
41225 return "fst\t%y0";
41227 else if (MEM_P (operands[0]))
41229 gcc_assert (REG_P (operands[1]));
41230 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41231 return "fstp%Z0\t%y0";
41232 else
41234 /* There is no non-popping store to memory for XFmode.
41235 So if we need one, follow the store with a load. */
41236 if (GET_MODE (operands[0]) == XFmode)
41237 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41238 else
41239 return "fst%Z0\t%y0";
41242 else
41243 gcc_unreachable();
41246 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41247 FP status register is set. */
41249 void
41250 ix86_emit_fp_unordered_jump (rtx label)
41252 rtx reg = gen_reg_rtx (HImode);
41253 rtx temp;
41255 emit_insn (gen_x86_fnstsw_1 (reg));
41257 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41259 emit_insn (gen_x86_sahf_1 (reg));
41261 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41262 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41264 else
41266 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41268 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41269 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41272 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41273 gen_rtx_LABEL_REF (VOIDmode, label),
41274 pc_rtx);
41275 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41277 emit_jump_insn (temp);
41278 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41281 /* Output code to perform a log1p XFmode calculation. */
41283 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41285 rtx label1 = gen_label_rtx ();
41286 rtx label2 = gen_label_rtx ();
41288 rtx tmp = gen_reg_rtx (XFmode);
41289 rtx tmp2 = gen_reg_rtx (XFmode);
41290 rtx test;
41292 emit_insn (gen_absxf2 (tmp, op1));
41293 test = gen_rtx_GE (VOIDmode, tmp,
41294 CONST_DOUBLE_FROM_REAL_VALUE (
41295 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41296 XFmode));
41297 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41299 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41300 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41301 emit_jump (label2);
41303 emit_label (label1);
41304 emit_move_insn (tmp, CONST1_RTX (XFmode));
41305 emit_insn (gen_addxf3 (tmp, op1, tmp));
41306 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41307 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41309 emit_label (label2);
41312 /* Emit code for round calculation. */
41313 void ix86_emit_i387_round (rtx op0, rtx op1)
41315 enum machine_mode inmode = GET_MODE (op1);
41316 enum machine_mode outmode = GET_MODE (op0);
41317 rtx e1, e2, res, tmp, tmp1, half;
41318 rtx scratch = gen_reg_rtx (HImode);
41319 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41320 rtx jump_label = gen_label_rtx ();
41321 rtx insn;
41322 rtx (*gen_abs) (rtx, rtx);
41323 rtx (*gen_neg) (rtx, rtx);
41325 switch (inmode)
41327 case SFmode:
41328 gen_abs = gen_abssf2;
41329 break;
41330 case DFmode:
41331 gen_abs = gen_absdf2;
41332 break;
41333 case XFmode:
41334 gen_abs = gen_absxf2;
41335 break;
41336 default:
41337 gcc_unreachable ();
41340 switch (outmode)
41342 case SFmode:
41343 gen_neg = gen_negsf2;
41344 break;
41345 case DFmode:
41346 gen_neg = gen_negdf2;
41347 break;
41348 case XFmode:
41349 gen_neg = gen_negxf2;
41350 break;
41351 case HImode:
41352 gen_neg = gen_neghi2;
41353 break;
41354 case SImode:
41355 gen_neg = gen_negsi2;
41356 break;
41357 case DImode:
41358 gen_neg = gen_negdi2;
41359 break;
41360 default:
41361 gcc_unreachable ();
41364 e1 = gen_reg_rtx (inmode);
41365 e2 = gen_reg_rtx (inmode);
41366 res = gen_reg_rtx (outmode);
41368 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41370 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41372 /* scratch = fxam(op1) */
41373 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41374 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41375 UNSPEC_FXAM)));
41376 /* e1 = fabs(op1) */
41377 emit_insn (gen_abs (e1, op1));
41379 /* e2 = e1 + 0.5 */
41380 half = force_reg (inmode, half);
41381 emit_insn (gen_rtx_SET (VOIDmode, e2,
41382 gen_rtx_PLUS (inmode, e1, half)));
41384 /* res = floor(e2) */
41385 if (inmode != XFmode)
41387 tmp1 = gen_reg_rtx (XFmode);
41389 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41390 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41392 else
41393 tmp1 = e2;
41395 switch (outmode)
41397 case SFmode:
41398 case DFmode:
41400 rtx tmp0 = gen_reg_rtx (XFmode);
41402 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41404 emit_insn (gen_rtx_SET (VOIDmode, res,
41405 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41406 UNSPEC_TRUNC_NOOP)));
41408 break;
41409 case XFmode:
41410 emit_insn (gen_frndintxf2_floor (res, tmp1));
41411 break;
41412 case HImode:
41413 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41414 break;
41415 case SImode:
41416 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41417 break;
41418 case DImode:
41419 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41420 break;
41421 default:
41422 gcc_unreachable ();
41425 /* flags = signbit(a) */
41426 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41428 /* if (flags) then res = -res */
41429 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41430 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41431 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41432 pc_rtx);
41433 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41434 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41435 JUMP_LABEL (insn) = jump_label;
41437 emit_insn (gen_neg (res, res));
41439 emit_label (jump_label);
41440 LABEL_NUSES (jump_label) = 1;
41442 emit_move_insn (op0, res);
41445 /* Output code to perform a Newton-Rhapson approximation of a single precision
41446 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41448 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41450 rtx x0, x1, e0, e1;
41452 x0 = gen_reg_rtx (mode);
41453 e0 = gen_reg_rtx (mode);
41454 e1 = gen_reg_rtx (mode);
41455 x1 = gen_reg_rtx (mode);
41457 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41459 b = force_reg (mode, b);
41461 /* x0 = rcp(b) estimate */
41462 if (mode == V16SFmode || mode == V8DFmode)
41463 emit_insn (gen_rtx_SET (VOIDmode, x0,
41464 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41465 UNSPEC_RCP14)));
41466 else
41467 emit_insn (gen_rtx_SET (VOIDmode, x0,
41468 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41469 UNSPEC_RCP)));
41471 /* e0 = x0 * b */
41472 emit_insn (gen_rtx_SET (VOIDmode, e0,
41473 gen_rtx_MULT (mode, x0, b)));
41475 /* e0 = x0 * e0 */
41476 emit_insn (gen_rtx_SET (VOIDmode, e0,
41477 gen_rtx_MULT (mode, x0, e0)));
41479 /* e1 = x0 + x0 */
41480 emit_insn (gen_rtx_SET (VOIDmode, e1,
41481 gen_rtx_PLUS (mode, x0, x0)));
41483 /* x1 = e1 - e0 */
41484 emit_insn (gen_rtx_SET (VOIDmode, x1,
41485 gen_rtx_MINUS (mode, e1, e0)));
41487 /* res = a * x1 */
41488 emit_insn (gen_rtx_SET (VOIDmode, res,
41489 gen_rtx_MULT (mode, a, x1)));
41492 /* Output code to perform a Newton-Rhapson approximation of a
41493 single precision floating point [reciprocal] square root. */
41495 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41496 bool recip)
41498 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41499 REAL_VALUE_TYPE r;
41500 int unspec;
41502 x0 = gen_reg_rtx (mode);
41503 e0 = gen_reg_rtx (mode);
41504 e1 = gen_reg_rtx (mode);
41505 e2 = gen_reg_rtx (mode);
41506 e3 = gen_reg_rtx (mode);
41508 real_from_integer (&r, VOIDmode, -3, -1, 0);
41509 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41511 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41512 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41513 unspec = UNSPEC_RSQRT;
41515 if (VECTOR_MODE_P (mode))
41517 mthree = ix86_build_const_vector (mode, true, mthree);
41518 mhalf = ix86_build_const_vector (mode, true, mhalf);
41519 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41520 if (GET_MODE_SIZE (mode) == 64)
41521 unspec = UNSPEC_RSQRT14;
41524 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41525 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41527 a = force_reg (mode, a);
41529 /* x0 = rsqrt(a) estimate */
41530 emit_insn (gen_rtx_SET (VOIDmode, x0,
41531 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41532 unspec)));
41534 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41535 if (!recip)
41537 rtx zero, mask;
41539 zero = gen_reg_rtx (mode);
41540 mask = gen_reg_rtx (mode);
41542 zero = force_reg (mode, CONST0_RTX(mode));
41544 /* Handle masked compare. */
41545 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41547 mask = gen_reg_rtx (HImode);
41548 /* Imm value 0x4 corresponds to not-equal comparison. */
41549 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41550 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41552 else
41554 emit_insn (gen_rtx_SET (VOIDmode, mask,
41555 gen_rtx_NE (mode, zero, a)));
41557 emit_insn (gen_rtx_SET (VOIDmode, x0,
41558 gen_rtx_AND (mode, x0, mask)));
41562 /* e0 = x0 * a */
41563 emit_insn (gen_rtx_SET (VOIDmode, e0,
41564 gen_rtx_MULT (mode, x0, a)));
41565 /* e1 = e0 * x0 */
41566 emit_insn (gen_rtx_SET (VOIDmode, e1,
41567 gen_rtx_MULT (mode, e0, x0)));
41569 /* e2 = e1 - 3. */
41570 mthree = force_reg (mode, mthree);
41571 emit_insn (gen_rtx_SET (VOIDmode, e2,
41572 gen_rtx_PLUS (mode, e1, mthree)));
41574 mhalf = force_reg (mode, mhalf);
41575 if (recip)
41576 /* e3 = -.5 * x0 */
41577 emit_insn (gen_rtx_SET (VOIDmode, e3,
41578 gen_rtx_MULT (mode, x0, mhalf)));
41579 else
41580 /* e3 = -.5 * e0 */
41581 emit_insn (gen_rtx_SET (VOIDmode, e3,
41582 gen_rtx_MULT (mode, e0, mhalf)));
41583 /* ret = e2 * e3 */
41584 emit_insn (gen_rtx_SET (VOIDmode, res,
41585 gen_rtx_MULT (mode, e2, e3)));
41588 #ifdef TARGET_SOLARIS
41589 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41591 static void
41592 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41593 tree decl)
41595 /* With Binutils 2.15, the "@unwind" marker must be specified on
41596 every occurrence of the ".eh_frame" section, not just the first
41597 one. */
41598 if (TARGET_64BIT
41599 && strcmp (name, ".eh_frame") == 0)
41601 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41602 flags & SECTION_WRITE ? "aw" : "a");
41603 return;
41606 #ifndef USE_GAS
41607 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41609 solaris_elf_asm_comdat_section (name, flags, decl);
41610 return;
41612 #endif
41614 default_elf_asm_named_section (name, flags, decl);
41616 #endif /* TARGET_SOLARIS */
41618 /* Return the mangling of TYPE if it is an extended fundamental type. */
41620 static const char *
41621 ix86_mangle_type (const_tree type)
41623 type = TYPE_MAIN_VARIANT (type);
41625 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41626 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41627 return NULL;
41629 switch (TYPE_MODE (type))
41631 case TFmode:
41632 /* __float128 is "g". */
41633 return "g";
41634 case XFmode:
41635 /* "long double" or __float80 is "e". */
41636 return "e";
41637 default:
41638 return NULL;
41642 /* For 32-bit code we can save PIC register setup by using
41643 __stack_chk_fail_local hidden function instead of calling
41644 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41645 register, so it is better to call __stack_chk_fail directly. */
41647 static tree ATTRIBUTE_UNUSED
41648 ix86_stack_protect_fail (void)
41650 return TARGET_64BIT
41651 ? default_external_stack_protect_fail ()
41652 : default_hidden_stack_protect_fail ();
41655 /* Select a format to encode pointers in exception handling data. CODE
41656 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41657 true if the symbol may be affected by dynamic relocations.
41659 ??? All x86 object file formats are capable of representing this.
41660 After all, the relocation needed is the same as for the call insn.
41661 Whether or not a particular assembler allows us to enter such, I
41662 guess we'll have to see. */
41664 asm_preferred_eh_data_format (int code, int global)
41666 if (flag_pic)
41668 int type = DW_EH_PE_sdata8;
41669 if (!TARGET_64BIT
41670 || ix86_cmodel == CM_SMALL_PIC
41671 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41672 type = DW_EH_PE_sdata4;
41673 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41675 if (ix86_cmodel == CM_SMALL
41676 || (ix86_cmodel == CM_MEDIUM && code))
41677 return DW_EH_PE_udata4;
41678 return DW_EH_PE_absptr;
41681 /* Expand copysign from SIGN to the positive value ABS_VALUE
41682 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41683 the sign-bit. */
41684 static void
41685 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41687 enum machine_mode mode = GET_MODE (sign);
41688 rtx sgn = gen_reg_rtx (mode);
41689 if (mask == NULL_RTX)
41691 enum machine_mode vmode;
41693 if (mode == SFmode)
41694 vmode = V4SFmode;
41695 else if (mode == DFmode)
41696 vmode = V2DFmode;
41697 else
41698 vmode = mode;
41700 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41701 if (!VECTOR_MODE_P (mode))
41703 /* We need to generate a scalar mode mask in this case. */
41704 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41705 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41706 mask = gen_reg_rtx (mode);
41707 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41710 else
41711 mask = gen_rtx_NOT (mode, mask);
41712 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41713 gen_rtx_AND (mode, mask, sign)));
41714 emit_insn (gen_rtx_SET (VOIDmode, result,
41715 gen_rtx_IOR (mode, abs_value, sgn)));
41718 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41719 mask for masking out the sign-bit is stored in *SMASK, if that is
41720 non-null. */
41721 static rtx
41722 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41724 enum machine_mode vmode, mode = GET_MODE (op0);
41725 rtx xa, mask;
41727 xa = gen_reg_rtx (mode);
41728 if (mode == SFmode)
41729 vmode = V4SFmode;
41730 else if (mode == DFmode)
41731 vmode = V2DFmode;
41732 else
41733 vmode = mode;
41734 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41735 if (!VECTOR_MODE_P (mode))
41737 /* We need to generate a scalar mode mask in this case. */
41738 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41739 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41740 mask = gen_reg_rtx (mode);
41741 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41743 emit_insn (gen_rtx_SET (VOIDmode, xa,
41744 gen_rtx_AND (mode, op0, mask)));
41746 if (smask)
41747 *smask = mask;
41749 return xa;
41752 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41753 swapping the operands if SWAP_OPERANDS is true. The expanded
41754 code is a forward jump to a newly created label in case the
41755 comparison is true. The generated label rtx is returned. */
41756 static rtx
41757 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41758 bool swap_operands)
41760 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41761 rtx label, tmp;
41763 if (swap_operands)
41765 tmp = op0;
41766 op0 = op1;
41767 op1 = tmp;
41770 label = gen_label_rtx ();
41771 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41772 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41773 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41774 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41775 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41776 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41777 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41778 JUMP_LABEL (tmp) = label;
41780 return label;
41783 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41784 using comparison code CODE. Operands are swapped for the comparison if
41785 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41786 static rtx
41787 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41788 bool swap_operands)
41790 rtx (*insn)(rtx, rtx, rtx, rtx);
41791 enum machine_mode mode = GET_MODE (op0);
41792 rtx mask = gen_reg_rtx (mode);
41794 if (swap_operands)
41796 rtx tmp = op0;
41797 op0 = op1;
41798 op1 = tmp;
41801 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41803 emit_insn (insn (mask, op0, op1,
41804 gen_rtx_fmt_ee (code, mode, op0, op1)));
41805 return mask;
41808 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41809 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41810 static rtx
41811 ix86_gen_TWO52 (enum machine_mode mode)
41813 REAL_VALUE_TYPE TWO52r;
41814 rtx TWO52;
41816 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41817 TWO52 = const_double_from_real_value (TWO52r, mode);
41818 TWO52 = force_reg (mode, TWO52);
41820 return TWO52;
41823 /* Expand SSE sequence for computing lround from OP1 storing
41824 into OP0. */
41825 void
41826 ix86_expand_lround (rtx op0, rtx op1)
41828 /* C code for the stuff we're doing below:
41829 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41830 return (long)tmp;
41832 enum machine_mode mode = GET_MODE (op1);
41833 const struct real_format *fmt;
41834 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41835 rtx adj;
41837 /* load nextafter (0.5, 0.0) */
41838 fmt = REAL_MODE_FORMAT (mode);
41839 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41840 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41842 /* adj = copysign (0.5, op1) */
41843 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41844 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41846 /* adj = op1 + adj */
41847 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41849 /* op0 = (imode)adj */
41850 expand_fix (op0, adj, 0);
41853 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41854 into OPERAND0. */
41855 void
41856 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41858 /* C code for the stuff we're doing below (for do_floor):
41859 xi = (long)op1;
41860 xi -= (double)xi > op1 ? 1 : 0;
41861 return xi;
41863 enum machine_mode fmode = GET_MODE (op1);
41864 enum machine_mode imode = GET_MODE (op0);
41865 rtx ireg, freg, label, tmp;
41867 /* reg = (long)op1 */
41868 ireg = gen_reg_rtx (imode);
41869 expand_fix (ireg, op1, 0);
41871 /* freg = (double)reg */
41872 freg = gen_reg_rtx (fmode);
41873 expand_float (freg, ireg, 0);
41875 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41876 label = ix86_expand_sse_compare_and_jump (UNLE,
41877 freg, op1, !do_floor);
41878 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41879 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41880 emit_move_insn (ireg, tmp);
41882 emit_label (label);
41883 LABEL_NUSES (label) = 1;
41885 emit_move_insn (op0, ireg);
41888 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41889 result in OPERAND0. */
41890 void
41891 ix86_expand_rint (rtx operand0, rtx operand1)
41893 /* C code for the stuff we're doing below:
41894 xa = fabs (operand1);
41895 if (!isless (xa, 2**52))
41896 return operand1;
41897 xa = xa + 2**52 - 2**52;
41898 return copysign (xa, operand1);
41900 enum machine_mode mode = GET_MODE (operand0);
41901 rtx res, xa, label, TWO52, mask;
41903 res = gen_reg_rtx (mode);
41904 emit_move_insn (res, operand1);
41906 /* xa = abs (operand1) */
41907 xa = ix86_expand_sse_fabs (res, &mask);
41909 /* if (!isless (xa, TWO52)) goto label; */
41910 TWO52 = ix86_gen_TWO52 (mode);
41911 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41913 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41914 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41916 ix86_sse_copysign_to_positive (res, xa, res, mask);
41918 emit_label (label);
41919 LABEL_NUSES (label) = 1;
41921 emit_move_insn (operand0, res);
41924 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41925 into OPERAND0. */
41926 void
41927 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41929 /* C code for the stuff we expand below.
41930 double xa = fabs (x), x2;
41931 if (!isless (xa, TWO52))
41932 return x;
41933 xa = xa + TWO52 - TWO52;
41934 x2 = copysign (xa, x);
41935 Compensate. Floor:
41936 if (x2 > x)
41937 x2 -= 1;
41938 Compensate. Ceil:
41939 if (x2 < x)
41940 x2 -= -1;
41941 return x2;
41943 enum machine_mode mode = GET_MODE (operand0);
41944 rtx xa, TWO52, tmp, label, one, res, mask;
41946 TWO52 = ix86_gen_TWO52 (mode);
41948 /* Temporary for holding the result, initialized to the input
41949 operand to ease control flow. */
41950 res = gen_reg_rtx (mode);
41951 emit_move_insn (res, operand1);
41953 /* xa = abs (operand1) */
41954 xa = ix86_expand_sse_fabs (res, &mask);
41956 /* if (!isless (xa, TWO52)) goto label; */
41957 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41959 /* xa = xa + TWO52 - TWO52; */
41960 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41961 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41963 /* xa = copysign (xa, operand1) */
41964 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41966 /* generate 1.0 or -1.0 */
41967 one = force_reg (mode,
41968 const_double_from_real_value (do_floor
41969 ? dconst1 : dconstm1, mode));
41971 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41972 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41973 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41974 gen_rtx_AND (mode, one, tmp)));
41975 /* We always need to subtract here to preserve signed zero. */
41976 tmp = expand_simple_binop (mode, MINUS,
41977 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41978 emit_move_insn (res, tmp);
41980 emit_label (label);
41981 LABEL_NUSES (label) = 1;
41983 emit_move_insn (operand0, res);
41986 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41987 into OPERAND0. */
41988 void
41989 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41991 /* C code for the stuff we expand below.
41992 double xa = fabs (x), x2;
41993 if (!isless (xa, TWO52))
41994 return x;
41995 x2 = (double)(long)x;
41996 Compensate. Floor:
41997 if (x2 > x)
41998 x2 -= 1;
41999 Compensate. Ceil:
42000 if (x2 < x)
42001 x2 += 1;
42002 if (HONOR_SIGNED_ZEROS (mode))
42003 return copysign (x2, x);
42004 return x2;
42006 enum machine_mode mode = GET_MODE (operand0);
42007 rtx xa, xi, TWO52, tmp, label, one, res, mask;
42009 TWO52 = ix86_gen_TWO52 (mode);
42011 /* Temporary for holding the result, initialized to the input
42012 operand to ease control flow. */
42013 res = gen_reg_rtx (mode);
42014 emit_move_insn (res, operand1);
42016 /* xa = abs (operand1) */
42017 xa = ix86_expand_sse_fabs (res, &mask);
42019 /* if (!isless (xa, TWO52)) goto label; */
42020 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42022 /* xa = (double)(long)x */
42023 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42024 expand_fix (xi, res, 0);
42025 expand_float (xa, xi, 0);
42027 /* generate 1.0 */
42028 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42030 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42031 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42032 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42033 gen_rtx_AND (mode, one, tmp)));
42034 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42035 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42036 emit_move_insn (res, tmp);
42038 if (HONOR_SIGNED_ZEROS (mode))
42039 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42041 emit_label (label);
42042 LABEL_NUSES (label) = 1;
42044 emit_move_insn (operand0, res);
42047 /* Expand SSE sequence for computing round from OPERAND1 storing
42048 into OPERAND0. Sequence that works without relying on DImode truncation
42049 via cvttsd2siq that is only available on 64bit targets. */
42050 void
42051 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42053 /* C code for the stuff we expand below.
42054 double xa = fabs (x), xa2, x2;
42055 if (!isless (xa, TWO52))
42056 return x;
42057 Using the absolute value and copying back sign makes
42058 -0.0 -> -0.0 correct.
42059 xa2 = xa + TWO52 - TWO52;
42060 Compensate.
42061 dxa = xa2 - xa;
42062 if (dxa <= -0.5)
42063 xa2 += 1;
42064 else if (dxa > 0.5)
42065 xa2 -= 1;
42066 x2 = copysign (xa2, x);
42067 return x2;
42069 enum machine_mode mode = GET_MODE (operand0);
42070 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42072 TWO52 = ix86_gen_TWO52 (mode);
42074 /* Temporary for holding the result, initialized to the input
42075 operand to ease control flow. */
42076 res = gen_reg_rtx (mode);
42077 emit_move_insn (res, operand1);
42079 /* xa = abs (operand1) */
42080 xa = ix86_expand_sse_fabs (res, &mask);
42082 /* if (!isless (xa, TWO52)) goto label; */
42083 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42085 /* xa2 = xa + TWO52 - TWO52; */
42086 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42087 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42089 /* dxa = xa2 - xa; */
42090 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42092 /* generate 0.5, 1.0 and -0.5 */
42093 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42094 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42095 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42096 0, OPTAB_DIRECT);
42098 /* Compensate. */
42099 tmp = gen_reg_rtx (mode);
42100 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42101 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42102 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42103 gen_rtx_AND (mode, one, tmp)));
42104 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42105 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42106 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42107 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42108 gen_rtx_AND (mode, one, tmp)));
42109 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42111 /* res = copysign (xa2, operand1) */
42112 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42114 emit_label (label);
42115 LABEL_NUSES (label) = 1;
42117 emit_move_insn (operand0, res);
42120 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42121 into OPERAND0. */
42122 void
42123 ix86_expand_trunc (rtx operand0, rtx operand1)
42125 /* C code for SSE variant we expand below.
42126 double xa = fabs (x), x2;
42127 if (!isless (xa, TWO52))
42128 return x;
42129 x2 = (double)(long)x;
42130 if (HONOR_SIGNED_ZEROS (mode))
42131 return copysign (x2, x);
42132 return x2;
42134 enum machine_mode mode = GET_MODE (operand0);
42135 rtx xa, xi, TWO52, label, res, mask;
42137 TWO52 = ix86_gen_TWO52 (mode);
42139 /* Temporary for holding the result, initialized to the input
42140 operand to ease control flow. */
42141 res = gen_reg_rtx (mode);
42142 emit_move_insn (res, operand1);
42144 /* xa = abs (operand1) */
42145 xa = ix86_expand_sse_fabs (res, &mask);
42147 /* if (!isless (xa, TWO52)) goto label; */
42148 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42150 /* x = (double)(long)x */
42151 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42152 expand_fix (xi, res, 0);
42153 expand_float (res, xi, 0);
42155 if (HONOR_SIGNED_ZEROS (mode))
42156 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42158 emit_label (label);
42159 LABEL_NUSES (label) = 1;
42161 emit_move_insn (operand0, res);
42164 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42165 into OPERAND0. */
42166 void
42167 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42169 enum machine_mode mode = GET_MODE (operand0);
42170 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42172 /* C code for SSE variant we expand below.
42173 double xa = fabs (x), x2;
42174 if (!isless (xa, TWO52))
42175 return x;
42176 xa2 = xa + TWO52 - TWO52;
42177 Compensate:
42178 if (xa2 > xa)
42179 xa2 -= 1.0;
42180 x2 = copysign (xa2, x);
42181 return x2;
42184 TWO52 = ix86_gen_TWO52 (mode);
42186 /* Temporary for holding the result, initialized to the input
42187 operand to ease control flow. */
42188 res = gen_reg_rtx (mode);
42189 emit_move_insn (res, operand1);
42191 /* xa = abs (operand1) */
42192 xa = ix86_expand_sse_fabs (res, &smask);
42194 /* if (!isless (xa, TWO52)) goto label; */
42195 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42197 /* res = xa + TWO52 - TWO52; */
42198 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42199 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42200 emit_move_insn (res, tmp);
42202 /* generate 1.0 */
42203 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42205 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42206 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42207 emit_insn (gen_rtx_SET (VOIDmode, mask,
42208 gen_rtx_AND (mode, mask, one)));
42209 tmp = expand_simple_binop (mode, MINUS,
42210 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42211 emit_move_insn (res, tmp);
42213 /* res = copysign (res, operand1) */
42214 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42216 emit_label (label);
42217 LABEL_NUSES (label) = 1;
42219 emit_move_insn (operand0, res);
42222 /* Expand SSE sequence for computing round from OPERAND1 storing
42223 into OPERAND0. */
42224 void
42225 ix86_expand_round (rtx operand0, rtx operand1)
42227 /* C code for the stuff we're doing below:
42228 double xa = fabs (x);
42229 if (!isless (xa, TWO52))
42230 return x;
42231 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42232 return copysign (xa, x);
42234 enum machine_mode mode = GET_MODE (operand0);
42235 rtx res, TWO52, xa, label, xi, half, mask;
42236 const struct real_format *fmt;
42237 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42239 /* Temporary for holding the result, initialized to the input
42240 operand to ease control flow. */
42241 res = gen_reg_rtx (mode);
42242 emit_move_insn (res, operand1);
42244 TWO52 = ix86_gen_TWO52 (mode);
42245 xa = ix86_expand_sse_fabs (res, &mask);
42246 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42248 /* load nextafter (0.5, 0.0) */
42249 fmt = REAL_MODE_FORMAT (mode);
42250 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42251 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42253 /* xa = xa + 0.5 */
42254 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42255 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42257 /* xa = (double)(int64_t)xa */
42258 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42259 expand_fix (xi, xa, 0);
42260 expand_float (xa, xi, 0);
42262 /* res = copysign (xa, operand1) */
42263 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42265 emit_label (label);
42266 LABEL_NUSES (label) = 1;
42268 emit_move_insn (operand0, res);
42271 /* Expand SSE sequence for computing round
42272 from OP1 storing into OP0 using sse4 round insn. */
42273 void
42274 ix86_expand_round_sse4 (rtx op0, rtx op1)
42276 enum machine_mode mode = GET_MODE (op0);
42277 rtx e1, e2, res, half;
42278 const struct real_format *fmt;
42279 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42280 rtx (*gen_copysign) (rtx, rtx, rtx);
42281 rtx (*gen_round) (rtx, rtx, rtx);
42283 switch (mode)
42285 case SFmode:
42286 gen_copysign = gen_copysignsf3;
42287 gen_round = gen_sse4_1_roundsf2;
42288 break;
42289 case DFmode:
42290 gen_copysign = gen_copysigndf3;
42291 gen_round = gen_sse4_1_rounddf2;
42292 break;
42293 default:
42294 gcc_unreachable ();
42297 /* round (a) = trunc (a + copysign (0.5, a)) */
42299 /* load nextafter (0.5, 0.0) */
42300 fmt = REAL_MODE_FORMAT (mode);
42301 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42302 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42303 half = const_double_from_real_value (pred_half, mode);
42305 /* e1 = copysign (0.5, op1) */
42306 e1 = gen_reg_rtx (mode);
42307 emit_insn (gen_copysign (e1, half, op1));
42309 /* e2 = op1 + e1 */
42310 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42312 /* res = trunc (e2) */
42313 res = gen_reg_rtx (mode);
42314 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42316 emit_move_insn (op0, res);
42320 /* Table of valid machine attributes. */
42321 static const struct attribute_spec ix86_attribute_table[] =
42323 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42324 affects_type_identity } */
42325 /* Stdcall attribute says callee is responsible for popping arguments
42326 if they are not variable. */
42327 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42328 true },
42329 /* Fastcall attribute says callee is responsible for popping arguments
42330 if they are not variable. */
42331 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42332 true },
42333 /* Thiscall attribute says callee is responsible for popping arguments
42334 if they are not variable. */
42335 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42336 true },
42337 /* Cdecl attribute says the callee is a normal C declaration */
42338 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42339 true },
42340 /* Regparm attribute specifies how many integer arguments are to be
42341 passed in registers. */
42342 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42343 true },
42344 /* Sseregparm attribute says we are using x86_64 calling conventions
42345 for FP arguments. */
42346 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42347 true },
42348 /* The transactional memory builtins are implicitly regparm or fastcall
42349 depending on the ABI. Override the generic do-nothing attribute that
42350 these builtins were declared with. */
42351 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42352 true },
42353 /* force_align_arg_pointer says this function realigns the stack at entry. */
42354 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42355 false, true, true, ix86_handle_cconv_attribute, false },
42356 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42357 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42358 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42359 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42360 false },
42361 #endif
42362 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42363 false },
42364 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42365 false },
42366 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42367 SUBTARGET_ATTRIBUTE_TABLE,
42368 #endif
42369 /* ms_abi and sysv_abi calling convention function attributes. */
42370 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42371 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42372 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42373 false },
42374 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42375 ix86_handle_callee_pop_aggregate_return, true },
42376 /* End element. */
42377 { NULL, 0, 0, false, false, false, NULL, false }
42380 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42381 static int
42382 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42383 tree vectype,
42384 int misalign ATTRIBUTE_UNUSED)
42386 unsigned elements;
42388 switch (type_of_cost)
42390 case scalar_stmt:
42391 return ix86_cost->scalar_stmt_cost;
42393 case scalar_load:
42394 return ix86_cost->scalar_load_cost;
42396 case scalar_store:
42397 return ix86_cost->scalar_store_cost;
42399 case vector_stmt:
42400 return ix86_cost->vec_stmt_cost;
42402 case vector_load:
42403 return ix86_cost->vec_align_load_cost;
42405 case vector_store:
42406 return ix86_cost->vec_store_cost;
42408 case vec_to_scalar:
42409 return ix86_cost->vec_to_scalar_cost;
42411 case scalar_to_vec:
42412 return ix86_cost->scalar_to_vec_cost;
42414 case unaligned_load:
42415 case unaligned_store:
42416 return ix86_cost->vec_unalign_load_cost;
42418 case cond_branch_taken:
42419 return ix86_cost->cond_taken_branch_cost;
42421 case cond_branch_not_taken:
42422 return ix86_cost->cond_not_taken_branch_cost;
42424 case vec_perm:
42425 case vec_promote_demote:
42426 return ix86_cost->vec_stmt_cost;
42428 case vec_construct:
42429 elements = TYPE_VECTOR_SUBPARTS (vectype);
42430 return elements / 2 + 1;
42432 default:
42433 gcc_unreachable ();
42437 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42438 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42439 insn every time. */
42441 static GTY(()) rtx vselect_insn;
42443 /* Initialize vselect_insn. */
42445 static void
42446 init_vselect_insn (void)
42448 unsigned i;
42449 rtx x;
42451 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42452 for (i = 0; i < MAX_VECT_LEN; ++i)
42453 XVECEXP (x, 0, i) = const0_rtx;
42454 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42455 const0_rtx), x);
42456 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42457 start_sequence ();
42458 vselect_insn = emit_insn (x);
42459 end_sequence ();
42462 /* Construct (set target (vec_select op0 (parallel perm))) and
42463 return true if that's a valid instruction in the active ISA. */
42465 static bool
42466 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42467 unsigned nelt, bool testing_p)
42469 unsigned int i;
42470 rtx x, save_vconcat;
42471 int icode;
42473 if (vselect_insn == NULL_RTX)
42474 init_vselect_insn ();
42476 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42477 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42478 for (i = 0; i < nelt; ++i)
42479 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42480 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42481 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42482 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42483 SET_DEST (PATTERN (vselect_insn)) = target;
42484 icode = recog_memoized (vselect_insn);
42486 if (icode >= 0 && !testing_p)
42487 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42489 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42490 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42491 INSN_CODE (vselect_insn) = -1;
42493 return icode >= 0;
42496 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42498 static bool
42499 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42500 const unsigned char *perm, unsigned nelt,
42501 bool testing_p)
42503 enum machine_mode v2mode;
42504 rtx x;
42505 bool ok;
42507 if (vselect_insn == NULL_RTX)
42508 init_vselect_insn ();
42510 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42511 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42512 PUT_MODE (x, v2mode);
42513 XEXP (x, 0) = op0;
42514 XEXP (x, 1) = op1;
42515 ok = expand_vselect (target, x, perm, nelt, testing_p);
42516 XEXP (x, 0) = const0_rtx;
42517 XEXP (x, 1) = const0_rtx;
42518 return ok;
42521 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42522 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42524 static bool
42525 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42527 enum machine_mode vmode = d->vmode;
42528 unsigned i, mask, nelt = d->nelt;
42529 rtx target, op0, op1, x;
42530 rtx rperm[32], vperm;
42532 if (d->one_operand_p)
42533 return false;
42534 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42536 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42538 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42540 else
42541 return false;
42543 /* This is a blend, not a permute. Elements must stay in their
42544 respective lanes. */
42545 for (i = 0; i < nelt; ++i)
42547 unsigned e = d->perm[i];
42548 if (!(e == i || e == i + nelt))
42549 return false;
42552 if (d->testing_p)
42553 return true;
42555 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42556 decision should be extracted elsewhere, so that we only try that
42557 sequence once all budget==3 options have been tried. */
42558 target = d->target;
42559 op0 = d->op0;
42560 op1 = d->op1;
42561 mask = 0;
42563 switch (vmode)
42565 case V4DFmode:
42566 case V8SFmode:
42567 case V2DFmode:
42568 case V4SFmode:
42569 case V8HImode:
42570 case V8SImode:
42571 for (i = 0; i < nelt; ++i)
42572 mask |= (d->perm[i] >= nelt) << i;
42573 break;
42575 case V2DImode:
42576 for (i = 0; i < 2; ++i)
42577 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42578 vmode = V8HImode;
42579 goto do_subreg;
42581 case V4SImode:
42582 for (i = 0; i < 4; ++i)
42583 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42584 vmode = V8HImode;
42585 goto do_subreg;
42587 case V16QImode:
42588 /* See if bytes move in pairs so we can use pblendw with
42589 an immediate argument, rather than pblendvb with a vector
42590 argument. */
42591 for (i = 0; i < 16; i += 2)
42592 if (d->perm[i] + 1 != d->perm[i + 1])
42594 use_pblendvb:
42595 for (i = 0; i < nelt; ++i)
42596 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42598 finish_pblendvb:
42599 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42600 vperm = force_reg (vmode, vperm);
42602 if (GET_MODE_SIZE (vmode) == 16)
42603 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42604 else
42605 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42606 if (target != d->target)
42607 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42608 return true;
42611 for (i = 0; i < 8; ++i)
42612 mask |= (d->perm[i * 2] >= 16) << i;
42613 vmode = V8HImode;
42614 /* FALLTHRU */
42616 do_subreg:
42617 target = gen_reg_rtx (vmode);
42618 op0 = gen_lowpart (vmode, op0);
42619 op1 = gen_lowpart (vmode, op1);
42620 break;
42622 case V32QImode:
42623 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42624 for (i = 0; i < 32; i += 2)
42625 if (d->perm[i] + 1 != d->perm[i + 1])
42626 goto use_pblendvb;
42627 /* See if bytes move in quadruplets. If yes, vpblendd
42628 with immediate can be used. */
42629 for (i = 0; i < 32; i += 4)
42630 if (d->perm[i] + 2 != d->perm[i + 2])
42631 break;
42632 if (i < 32)
42634 /* See if bytes move the same in both lanes. If yes,
42635 vpblendw with immediate can be used. */
42636 for (i = 0; i < 16; i += 2)
42637 if (d->perm[i] + 16 != d->perm[i + 16])
42638 goto use_pblendvb;
42640 /* Use vpblendw. */
42641 for (i = 0; i < 16; ++i)
42642 mask |= (d->perm[i * 2] >= 32) << i;
42643 vmode = V16HImode;
42644 goto do_subreg;
42647 /* Use vpblendd. */
42648 for (i = 0; i < 8; ++i)
42649 mask |= (d->perm[i * 4] >= 32) << i;
42650 vmode = V8SImode;
42651 goto do_subreg;
42653 case V16HImode:
42654 /* See if words move in pairs. If yes, vpblendd can be used. */
42655 for (i = 0; i < 16; i += 2)
42656 if (d->perm[i] + 1 != d->perm[i + 1])
42657 break;
42658 if (i < 16)
42660 /* See if words move the same in both lanes. If not,
42661 vpblendvb must be used. */
42662 for (i = 0; i < 8; i++)
42663 if (d->perm[i] + 8 != d->perm[i + 8])
42665 /* Use vpblendvb. */
42666 for (i = 0; i < 32; ++i)
42667 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42669 vmode = V32QImode;
42670 nelt = 32;
42671 target = gen_reg_rtx (vmode);
42672 op0 = gen_lowpart (vmode, op0);
42673 op1 = gen_lowpart (vmode, op1);
42674 goto finish_pblendvb;
42677 /* Use vpblendw. */
42678 for (i = 0; i < 16; ++i)
42679 mask |= (d->perm[i] >= 16) << i;
42680 break;
42683 /* Use vpblendd. */
42684 for (i = 0; i < 8; ++i)
42685 mask |= (d->perm[i * 2] >= 16) << i;
42686 vmode = V8SImode;
42687 goto do_subreg;
42689 case V4DImode:
42690 /* Use vpblendd. */
42691 for (i = 0; i < 4; ++i)
42692 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42693 vmode = V8SImode;
42694 goto do_subreg;
42696 default:
42697 gcc_unreachable ();
42700 /* This matches five different patterns with the different modes. */
42701 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42702 x = gen_rtx_SET (VOIDmode, target, x);
42703 emit_insn (x);
42704 if (target != d->target)
42705 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42707 return true;
42710 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42711 in terms of the variable form of vpermilps.
42713 Note that we will have already failed the immediate input vpermilps,
42714 which requires that the high and low part shuffle be identical; the
42715 variable form doesn't require that. */
42717 static bool
42718 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42720 rtx rperm[8], vperm;
42721 unsigned i;
42723 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42724 return false;
42726 /* We can only permute within the 128-bit lane. */
42727 for (i = 0; i < 8; ++i)
42729 unsigned e = d->perm[i];
42730 if (i < 4 ? e >= 4 : e < 4)
42731 return false;
42734 if (d->testing_p)
42735 return true;
42737 for (i = 0; i < 8; ++i)
42739 unsigned e = d->perm[i];
42741 /* Within each 128-bit lane, the elements of op0 are numbered
42742 from 0 and the elements of op1 are numbered from 4. */
42743 if (e >= 8 + 4)
42744 e -= 8;
42745 else if (e >= 4)
42746 e -= 4;
42748 rperm[i] = GEN_INT (e);
42751 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42752 vperm = force_reg (V8SImode, vperm);
42753 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42755 return true;
42758 /* Return true if permutation D can be performed as VMODE permutation
42759 instead. */
42761 static bool
42762 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42764 unsigned int i, j, chunk;
42766 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42767 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42768 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42769 return false;
42771 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42772 return true;
42774 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42775 for (i = 0; i < d->nelt; i += chunk)
42776 if (d->perm[i] & (chunk - 1))
42777 return false;
42778 else
42779 for (j = 1; j < chunk; ++j)
42780 if (d->perm[i] + j != d->perm[i + j])
42781 return false;
42783 return true;
42786 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42787 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42789 static bool
42790 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42792 unsigned i, nelt, eltsz, mask;
42793 unsigned char perm[32];
42794 enum machine_mode vmode = V16QImode;
42795 rtx rperm[32], vperm, target, op0, op1;
42797 nelt = d->nelt;
42799 if (!d->one_operand_p)
42801 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42803 if (TARGET_AVX2
42804 && valid_perm_using_mode_p (V2TImode, d))
42806 if (d->testing_p)
42807 return true;
42809 /* Use vperm2i128 insn. The pattern uses
42810 V4DImode instead of V2TImode. */
42811 target = d->target;
42812 if (d->vmode != V4DImode)
42813 target = gen_reg_rtx (V4DImode);
42814 op0 = gen_lowpart (V4DImode, d->op0);
42815 op1 = gen_lowpart (V4DImode, d->op1);
42816 rperm[0]
42817 = GEN_INT ((d->perm[0] / (nelt / 2))
42818 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
42819 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42820 if (target != d->target)
42821 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42822 return true;
42824 return false;
42827 else
42829 if (GET_MODE_SIZE (d->vmode) == 16)
42831 if (!TARGET_SSSE3)
42832 return false;
42834 else if (GET_MODE_SIZE (d->vmode) == 32)
42836 if (!TARGET_AVX2)
42837 return false;
42839 /* V4DImode should be already handled through
42840 expand_vselect by vpermq instruction. */
42841 gcc_assert (d->vmode != V4DImode);
42843 vmode = V32QImode;
42844 if (d->vmode == V8SImode
42845 || d->vmode == V16HImode
42846 || d->vmode == V32QImode)
42848 /* First see if vpermq can be used for
42849 V8SImode/V16HImode/V32QImode. */
42850 if (valid_perm_using_mode_p (V4DImode, d))
42852 for (i = 0; i < 4; i++)
42853 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42854 if (d->testing_p)
42855 return true;
42856 target = gen_reg_rtx (V4DImode);
42857 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42858 perm, 4, false))
42860 emit_move_insn (d->target,
42861 gen_lowpart (d->vmode, target));
42862 return true;
42864 return false;
42867 /* Next see if vpermd can be used. */
42868 if (valid_perm_using_mode_p (V8SImode, d))
42869 vmode = V8SImode;
42871 /* Or if vpermps can be used. */
42872 else if (d->vmode == V8SFmode)
42873 vmode = V8SImode;
42875 if (vmode == V32QImode)
42877 /* vpshufb only works intra lanes, it is not
42878 possible to shuffle bytes in between the lanes. */
42879 for (i = 0; i < nelt; ++i)
42880 if ((d->perm[i] ^ i) & (nelt / 2))
42881 return false;
42884 else
42885 return false;
42888 if (d->testing_p)
42889 return true;
42891 if (vmode == V8SImode)
42892 for (i = 0; i < 8; ++i)
42893 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42894 else
42896 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42897 if (!d->one_operand_p)
42898 mask = 2 * nelt - 1;
42899 else if (vmode == V16QImode)
42900 mask = nelt - 1;
42901 else
42902 mask = nelt / 2 - 1;
42904 for (i = 0; i < nelt; ++i)
42906 unsigned j, e = d->perm[i] & mask;
42907 for (j = 0; j < eltsz; ++j)
42908 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42912 vperm = gen_rtx_CONST_VECTOR (vmode,
42913 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42914 vperm = force_reg (vmode, vperm);
42916 target = d->target;
42917 if (d->vmode != vmode)
42918 target = gen_reg_rtx (vmode);
42919 op0 = gen_lowpart (vmode, d->op0);
42920 if (d->one_operand_p)
42922 if (vmode == V16QImode)
42923 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42924 else if (vmode == V32QImode)
42925 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42926 else if (vmode == V8SFmode)
42927 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42928 else
42929 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42931 else
42933 op1 = gen_lowpart (vmode, d->op1);
42934 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42936 if (target != d->target)
42937 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42939 return true;
42942 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42943 in a single instruction. */
42945 static bool
42946 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42948 unsigned i, nelt = d->nelt;
42949 unsigned char perm2[MAX_VECT_LEN];
42951 /* Check plain VEC_SELECT first, because AVX has instructions that could
42952 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42953 input where SEL+CONCAT may not. */
42954 if (d->one_operand_p)
42956 int mask = nelt - 1;
42957 bool identity_perm = true;
42958 bool broadcast_perm = true;
42960 for (i = 0; i < nelt; i++)
42962 perm2[i] = d->perm[i] & mask;
42963 if (perm2[i] != i)
42964 identity_perm = false;
42965 if (perm2[i])
42966 broadcast_perm = false;
42969 if (identity_perm)
42971 if (!d->testing_p)
42972 emit_move_insn (d->target, d->op0);
42973 return true;
42975 else if (broadcast_perm && TARGET_AVX2)
42977 /* Use vpbroadcast{b,w,d}. */
42978 rtx (*gen) (rtx, rtx) = NULL;
42979 switch (d->vmode)
42981 case V32QImode:
42982 gen = gen_avx2_pbroadcastv32qi_1;
42983 break;
42984 case V16HImode:
42985 gen = gen_avx2_pbroadcastv16hi_1;
42986 break;
42987 case V8SImode:
42988 gen = gen_avx2_pbroadcastv8si_1;
42989 break;
42990 case V16QImode:
42991 gen = gen_avx2_pbroadcastv16qi;
42992 break;
42993 case V8HImode:
42994 gen = gen_avx2_pbroadcastv8hi;
42995 break;
42996 case V8SFmode:
42997 gen = gen_avx2_vec_dupv8sf_1;
42998 break;
42999 /* For other modes prefer other shuffles this function creates. */
43000 default: break;
43002 if (gen != NULL)
43004 if (!d->testing_p)
43005 emit_insn (gen (d->target, d->op0));
43006 return true;
43010 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43011 return true;
43013 /* There are plenty of patterns in sse.md that are written for
43014 SEL+CONCAT and are not replicated for a single op. Perhaps
43015 that should be changed, to avoid the nastiness here. */
43017 /* Recognize interleave style patterns, which means incrementing
43018 every other permutation operand. */
43019 for (i = 0; i < nelt; i += 2)
43021 perm2[i] = d->perm[i] & mask;
43022 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43024 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43025 d->testing_p))
43026 return true;
43028 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43029 if (nelt >= 4)
43031 for (i = 0; i < nelt; i += 4)
43033 perm2[i + 0] = d->perm[i + 0] & mask;
43034 perm2[i + 1] = d->perm[i + 1] & mask;
43035 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43036 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43039 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43040 d->testing_p))
43041 return true;
43045 /* Finally, try the fully general two operand permute. */
43046 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43047 d->testing_p))
43048 return true;
43050 /* Recognize interleave style patterns with reversed operands. */
43051 if (!d->one_operand_p)
43053 for (i = 0; i < nelt; ++i)
43055 unsigned e = d->perm[i];
43056 if (e >= nelt)
43057 e -= nelt;
43058 else
43059 e += nelt;
43060 perm2[i] = e;
43063 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43064 d->testing_p))
43065 return true;
43068 /* Try the SSE4.1 blend variable merge instructions. */
43069 if (expand_vec_perm_blend (d))
43070 return true;
43072 /* Try one of the AVX vpermil variable permutations. */
43073 if (expand_vec_perm_vpermil (d))
43074 return true;
43076 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43077 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43078 if (expand_vec_perm_pshufb (d))
43079 return true;
43081 /* Try the AVX512F vpermi2 instructions. */
43082 rtx vec[64];
43083 enum machine_mode mode = d->vmode;
43084 if (mode == V8DFmode)
43085 mode = V8DImode;
43086 else if (mode == V16SFmode)
43087 mode = V16SImode;
43088 for (i = 0; i < nelt; ++i)
43089 vec[i] = GEN_INT (d->perm[i]);
43090 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43091 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43092 return true;
43094 return false;
43097 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43098 in terms of a pair of pshuflw + pshufhw instructions. */
43100 static bool
43101 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43103 unsigned char perm2[MAX_VECT_LEN];
43104 unsigned i;
43105 bool ok;
43107 if (d->vmode != V8HImode || !d->one_operand_p)
43108 return false;
43110 /* The two permutations only operate in 64-bit lanes. */
43111 for (i = 0; i < 4; ++i)
43112 if (d->perm[i] >= 4)
43113 return false;
43114 for (i = 4; i < 8; ++i)
43115 if (d->perm[i] < 4)
43116 return false;
43118 if (d->testing_p)
43119 return true;
43121 /* Emit the pshuflw. */
43122 memcpy (perm2, d->perm, 4);
43123 for (i = 4; i < 8; ++i)
43124 perm2[i] = i;
43125 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43126 gcc_assert (ok);
43128 /* Emit the pshufhw. */
43129 memcpy (perm2 + 4, d->perm + 4, 4);
43130 for (i = 0; i < 4; ++i)
43131 perm2[i] = i;
43132 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43133 gcc_assert (ok);
43135 return true;
43138 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43139 the permutation using the SSSE3 palignr instruction. This succeeds
43140 when all of the elements in PERM fit within one vector and we merely
43141 need to shift them down so that a single vector permutation has a
43142 chance to succeed. */
43144 static bool
43145 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43147 unsigned i, nelt = d->nelt;
43148 unsigned min, max;
43149 bool in_order, ok;
43150 rtx shift, target;
43151 struct expand_vec_perm_d dcopy;
43153 /* Even with AVX, palignr only operates on 128-bit vectors. */
43154 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43155 return false;
43157 min = nelt, max = 0;
43158 for (i = 0; i < nelt; ++i)
43160 unsigned e = d->perm[i];
43161 if (e < min)
43162 min = e;
43163 if (e > max)
43164 max = e;
43166 if (min == 0 || max - min >= nelt)
43167 return false;
43169 /* Given that we have SSSE3, we know we'll be able to implement the
43170 single operand permutation after the palignr with pshufb. */
43171 if (d->testing_p)
43172 return true;
43174 dcopy = *d;
43175 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43176 target = gen_reg_rtx (TImode);
43177 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43178 gen_lowpart (TImode, d->op0), shift));
43180 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43181 dcopy.one_operand_p = true;
43183 in_order = true;
43184 for (i = 0; i < nelt; ++i)
43186 unsigned e = dcopy.perm[i] - min;
43187 if (e != i)
43188 in_order = false;
43189 dcopy.perm[i] = e;
43192 /* Test for the degenerate case where the alignment by itself
43193 produces the desired permutation. */
43194 if (in_order)
43196 emit_move_insn (d->target, dcopy.op0);
43197 return true;
43200 ok = expand_vec_perm_1 (&dcopy);
43201 gcc_assert (ok);
43203 return ok;
43206 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43208 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43209 a two vector permutation into a single vector permutation by using
43210 an interleave operation to merge the vectors. */
43212 static bool
43213 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43215 struct expand_vec_perm_d dremap, dfinal;
43216 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43217 unsigned HOST_WIDE_INT contents;
43218 unsigned char remap[2 * MAX_VECT_LEN];
43219 rtx seq;
43220 bool ok, same_halves = false;
43222 if (GET_MODE_SIZE (d->vmode) == 16)
43224 if (d->one_operand_p)
43225 return false;
43227 else if (GET_MODE_SIZE (d->vmode) == 32)
43229 if (!TARGET_AVX)
43230 return false;
43231 /* For 32-byte modes allow even d->one_operand_p.
43232 The lack of cross-lane shuffling in some instructions
43233 might prevent a single insn shuffle. */
43234 dfinal = *d;
43235 dfinal.testing_p = true;
43236 /* If expand_vec_perm_interleave3 can expand this into
43237 a 3 insn sequence, give up and let it be expanded as
43238 3 insn sequence. While that is one insn longer,
43239 it doesn't need a memory operand and in the common
43240 case that both interleave low and high permutations
43241 with the same operands are adjacent needs 4 insns
43242 for both after CSE. */
43243 if (expand_vec_perm_interleave3 (&dfinal))
43244 return false;
43246 else
43247 return false;
43249 /* Examine from whence the elements come. */
43250 contents = 0;
43251 for (i = 0; i < nelt; ++i)
43252 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43254 memset (remap, 0xff, sizeof (remap));
43255 dremap = *d;
43257 if (GET_MODE_SIZE (d->vmode) == 16)
43259 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43261 /* Split the two input vectors into 4 halves. */
43262 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43263 h2 = h1 << nelt2;
43264 h3 = h2 << nelt2;
43265 h4 = h3 << nelt2;
43267 /* If the elements from the low halves use interleave low, and similarly
43268 for interleave high. If the elements are from mis-matched halves, we
43269 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43270 if ((contents & (h1 | h3)) == contents)
43272 /* punpckl* */
43273 for (i = 0; i < nelt2; ++i)
43275 remap[i] = i * 2;
43276 remap[i + nelt] = i * 2 + 1;
43277 dremap.perm[i * 2] = i;
43278 dremap.perm[i * 2 + 1] = i + nelt;
43280 if (!TARGET_SSE2 && d->vmode == V4SImode)
43281 dremap.vmode = V4SFmode;
43283 else if ((contents & (h2 | h4)) == contents)
43285 /* punpckh* */
43286 for (i = 0; i < nelt2; ++i)
43288 remap[i + nelt2] = i * 2;
43289 remap[i + nelt + nelt2] = i * 2 + 1;
43290 dremap.perm[i * 2] = i + nelt2;
43291 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43293 if (!TARGET_SSE2 && d->vmode == V4SImode)
43294 dremap.vmode = V4SFmode;
43296 else if ((contents & (h1 | h4)) == contents)
43298 /* shufps */
43299 for (i = 0; i < nelt2; ++i)
43301 remap[i] = i;
43302 remap[i + nelt + nelt2] = i + nelt2;
43303 dremap.perm[i] = i;
43304 dremap.perm[i + nelt2] = i + nelt + nelt2;
43306 if (nelt != 4)
43308 /* shufpd */
43309 dremap.vmode = V2DImode;
43310 dremap.nelt = 2;
43311 dremap.perm[0] = 0;
43312 dremap.perm[1] = 3;
43315 else if ((contents & (h2 | h3)) == contents)
43317 /* shufps */
43318 for (i = 0; i < nelt2; ++i)
43320 remap[i + nelt2] = i;
43321 remap[i + nelt] = i + nelt2;
43322 dremap.perm[i] = i + nelt2;
43323 dremap.perm[i + nelt2] = i + nelt;
43325 if (nelt != 4)
43327 /* shufpd */
43328 dremap.vmode = V2DImode;
43329 dremap.nelt = 2;
43330 dremap.perm[0] = 1;
43331 dremap.perm[1] = 2;
43334 else
43335 return false;
43337 else
43339 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43340 unsigned HOST_WIDE_INT q[8];
43341 unsigned int nonzero_halves[4];
43343 /* Split the two input vectors into 8 quarters. */
43344 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43345 for (i = 1; i < 8; ++i)
43346 q[i] = q[0] << (nelt4 * i);
43347 for (i = 0; i < 4; ++i)
43348 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43350 nonzero_halves[nzcnt] = i;
43351 ++nzcnt;
43354 if (nzcnt == 1)
43356 gcc_assert (d->one_operand_p);
43357 nonzero_halves[1] = nonzero_halves[0];
43358 same_halves = true;
43360 else if (d->one_operand_p)
43362 gcc_assert (nonzero_halves[0] == 0);
43363 gcc_assert (nonzero_halves[1] == 1);
43366 if (nzcnt <= 2)
43368 if (d->perm[0] / nelt2 == nonzero_halves[1])
43370 /* Attempt to increase the likelihood that dfinal
43371 shuffle will be intra-lane. */
43372 char tmph = nonzero_halves[0];
43373 nonzero_halves[0] = nonzero_halves[1];
43374 nonzero_halves[1] = tmph;
43377 /* vperm2f128 or vperm2i128. */
43378 for (i = 0; i < nelt2; ++i)
43380 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43381 remap[i + nonzero_halves[0] * nelt2] = i;
43382 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43383 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43386 if (d->vmode != V8SFmode
43387 && d->vmode != V4DFmode
43388 && d->vmode != V8SImode)
43390 dremap.vmode = V8SImode;
43391 dremap.nelt = 8;
43392 for (i = 0; i < 4; ++i)
43394 dremap.perm[i] = i + nonzero_halves[0] * 4;
43395 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43399 else if (d->one_operand_p)
43400 return false;
43401 else if (TARGET_AVX2
43402 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43404 /* vpunpckl* */
43405 for (i = 0; i < nelt4; ++i)
43407 remap[i] = i * 2;
43408 remap[i + nelt] = i * 2 + 1;
43409 remap[i + nelt2] = i * 2 + nelt2;
43410 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43411 dremap.perm[i * 2] = i;
43412 dremap.perm[i * 2 + 1] = i + nelt;
43413 dremap.perm[i * 2 + nelt2] = i + nelt2;
43414 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43417 else if (TARGET_AVX2
43418 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43420 /* vpunpckh* */
43421 for (i = 0; i < nelt4; ++i)
43423 remap[i + nelt4] = i * 2;
43424 remap[i + nelt + nelt4] = i * 2 + 1;
43425 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43426 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43427 dremap.perm[i * 2] = i + nelt4;
43428 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43429 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43430 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43433 else
43434 return false;
43437 /* Use the remapping array set up above to move the elements from their
43438 swizzled locations into their final destinations. */
43439 dfinal = *d;
43440 for (i = 0; i < nelt; ++i)
43442 unsigned e = remap[d->perm[i]];
43443 gcc_assert (e < nelt);
43444 /* If same_halves is true, both halves of the remapped vector are the
43445 same. Avoid cross-lane accesses if possible. */
43446 if (same_halves && i >= nelt2)
43448 gcc_assert (e < nelt2);
43449 dfinal.perm[i] = e + nelt2;
43451 else
43452 dfinal.perm[i] = e;
43454 if (!d->testing_p)
43456 dremap.target = gen_reg_rtx (dremap.vmode);
43457 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43459 dfinal.op1 = dfinal.op0;
43460 dfinal.one_operand_p = true;
43462 /* Test if the final remap can be done with a single insn. For V4SFmode or
43463 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43464 start_sequence ();
43465 ok = expand_vec_perm_1 (&dfinal);
43466 seq = get_insns ();
43467 end_sequence ();
43469 if (!ok)
43470 return false;
43472 if (d->testing_p)
43473 return true;
43475 if (dremap.vmode != dfinal.vmode)
43477 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43478 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43481 ok = expand_vec_perm_1 (&dremap);
43482 gcc_assert (ok);
43484 emit_insn (seq);
43485 return true;
43488 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43489 a single vector cross-lane permutation into vpermq followed
43490 by any of the single insn permutations. */
43492 static bool
43493 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43495 struct expand_vec_perm_d dremap, dfinal;
43496 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43497 unsigned contents[2];
43498 bool ok;
43500 if (!(TARGET_AVX2
43501 && (d->vmode == V32QImode || d->vmode == V16HImode)
43502 && d->one_operand_p))
43503 return false;
43505 contents[0] = 0;
43506 contents[1] = 0;
43507 for (i = 0; i < nelt2; ++i)
43509 contents[0] |= 1u << (d->perm[i] / nelt4);
43510 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43513 for (i = 0; i < 2; ++i)
43515 unsigned int cnt = 0;
43516 for (j = 0; j < 4; ++j)
43517 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43518 return false;
43521 if (d->testing_p)
43522 return true;
43524 dremap = *d;
43525 dremap.vmode = V4DImode;
43526 dremap.nelt = 4;
43527 dremap.target = gen_reg_rtx (V4DImode);
43528 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43529 dremap.op1 = dremap.op0;
43530 dremap.one_operand_p = true;
43531 for (i = 0; i < 2; ++i)
43533 unsigned int cnt = 0;
43534 for (j = 0; j < 4; ++j)
43535 if ((contents[i] & (1u << j)) != 0)
43536 dremap.perm[2 * i + cnt++] = j;
43537 for (; cnt < 2; ++cnt)
43538 dremap.perm[2 * i + cnt] = 0;
43541 dfinal = *d;
43542 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43543 dfinal.op1 = dfinal.op0;
43544 dfinal.one_operand_p = true;
43545 for (i = 0, j = 0; i < nelt; ++i)
43547 if (i == nelt2)
43548 j = 2;
43549 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43550 if ((d->perm[i] / nelt4) == dremap.perm[j])
43552 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43553 dfinal.perm[i] |= nelt4;
43554 else
43555 gcc_unreachable ();
43558 ok = expand_vec_perm_1 (&dremap);
43559 gcc_assert (ok);
43561 ok = expand_vec_perm_1 (&dfinal);
43562 gcc_assert (ok);
43564 return true;
43567 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43568 a vector permutation using two instructions, vperm2f128 resp.
43569 vperm2i128 followed by any single in-lane permutation. */
43571 static bool
43572 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43574 struct expand_vec_perm_d dfirst, dsecond;
43575 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43576 bool ok;
43578 if (!TARGET_AVX
43579 || GET_MODE_SIZE (d->vmode) != 32
43580 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43581 return false;
43583 dsecond = *d;
43584 dsecond.one_operand_p = false;
43585 dsecond.testing_p = true;
43587 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43588 immediate. For perm < 16 the second permutation uses
43589 d->op0 as first operand, for perm >= 16 it uses d->op1
43590 as first operand. The second operand is the result of
43591 vperm2[fi]128. */
43592 for (perm = 0; perm < 32; perm++)
43594 /* Ignore permutations which do not move anything cross-lane. */
43595 if (perm < 16)
43597 /* The second shuffle for e.g. V4DFmode has
43598 0123 and ABCD operands.
43599 Ignore AB23, as 23 is already in the second lane
43600 of the first operand. */
43601 if ((perm & 0xc) == (1 << 2)) continue;
43602 /* And 01CD, as 01 is in the first lane of the first
43603 operand. */
43604 if ((perm & 3) == 0) continue;
43605 /* And 4567, as then the vperm2[fi]128 doesn't change
43606 anything on the original 4567 second operand. */
43607 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43609 else
43611 /* The second shuffle for e.g. V4DFmode has
43612 4567 and ABCD operands.
43613 Ignore AB67, as 67 is already in the second lane
43614 of the first operand. */
43615 if ((perm & 0xc) == (3 << 2)) continue;
43616 /* And 45CD, as 45 is in the first lane of the first
43617 operand. */
43618 if ((perm & 3) == 2) continue;
43619 /* And 0123, as then the vperm2[fi]128 doesn't change
43620 anything on the original 0123 first operand. */
43621 if ((perm & 0xf) == (1 << 2)) continue;
43624 for (i = 0; i < nelt; i++)
43626 j = d->perm[i] / nelt2;
43627 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43628 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43629 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43630 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43631 else
43632 break;
43635 if (i == nelt)
43637 start_sequence ();
43638 ok = expand_vec_perm_1 (&dsecond);
43639 end_sequence ();
43641 else
43642 ok = false;
43644 if (ok)
43646 if (d->testing_p)
43647 return true;
43649 /* Found a usable second shuffle. dfirst will be
43650 vperm2f128 on d->op0 and d->op1. */
43651 dsecond.testing_p = false;
43652 dfirst = *d;
43653 dfirst.target = gen_reg_rtx (d->vmode);
43654 for (i = 0; i < nelt; i++)
43655 dfirst.perm[i] = (i & (nelt2 - 1))
43656 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43658 ok = expand_vec_perm_1 (&dfirst);
43659 gcc_assert (ok);
43661 /* And dsecond is some single insn shuffle, taking
43662 d->op0 and result of vperm2f128 (if perm < 16) or
43663 d->op1 and result of vperm2f128 (otherwise). */
43664 dsecond.op1 = dfirst.target;
43665 if (perm >= 16)
43666 dsecond.op0 = dfirst.op1;
43668 ok = expand_vec_perm_1 (&dsecond);
43669 gcc_assert (ok);
43671 return true;
43674 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43675 if (d->one_operand_p)
43676 return false;
43679 return false;
43682 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43683 a two vector permutation using 2 intra-lane interleave insns
43684 and cross-lane shuffle for 32-byte vectors. */
43686 static bool
43687 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43689 unsigned i, nelt;
43690 rtx (*gen) (rtx, rtx, rtx);
43692 if (d->one_operand_p)
43693 return false;
43694 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43696 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43698 else
43699 return false;
43701 nelt = d->nelt;
43702 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43703 return false;
43704 for (i = 0; i < nelt; i += 2)
43705 if (d->perm[i] != d->perm[0] + i / 2
43706 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43707 return false;
43709 if (d->testing_p)
43710 return true;
43712 switch (d->vmode)
43714 case V32QImode:
43715 if (d->perm[0])
43716 gen = gen_vec_interleave_highv32qi;
43717 else
43718 gen = gen_vec_interleave_lowv32qi;
43719 break;
43720 case V16HImode:
43721 if (d->perm[0])
43722 gen = gen_vec_interleave_highv16hi;
43723 else
43724 gen = gen_vec_interleave_lowv16hi;
43725 break;
43726 case V8SImode:
43727 if (d->perm[0])
43728 gen = gen_vec_interleave_highv8si;
43729 else
43730 gen = gen_vec_interleave_lowv8si;
43731 break;
43732 case V4DImode:
43733 if (d->perm[0])
43734 gen = gen_vec_interleave_highv4di;
43735 else
43736 gen = gen_vec_interleave_lowv4di;
43737 break;
43738 case V8SFmode:
43739 if (d->perm[0])
43740 gen = gen_vec_interleave_highv8sf;
43741 else
43742 gen = gen_vec_interleave_lowv8sf;
43743 break;
43744 case V4DFmode:
43745 if (d->perm[0])
43746 gen = gen_vec_interleave_highv4df;
43747 else
43748 gen = gen_vec_interleave_lowv4df;
43749 break;
43750 default:
43751 gcc_unreachable ();
43754 emit_insn (gen (d->target, d->op0, d->op1));
43755 return true;
43758 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43759 a single vector permutation using a single intra-lane vector
43760 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43761 the non-swapped and swapped vectors together. */
43763 static bool
43764 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43766 struct expand_vec_perm_d dfirst, dsecond;
43767 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43768 rtx seq;
43769 bool ok;
43770 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43772 if (!TARGET_AVX
43773 || TARGET_AVX2
43774 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43775 || !d->one_operand_p)
43776 return false;
43778 dfirst = *d;
43779 for (i = 0; i < nelt; i++)
43780 dfirst.perm[i] = 0xff;
43781 for (i = 0, msk = 0; i < nelt; i++)
43783 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43784 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43785 return false;
43786 dfirst.perm[j] = d->perm[i];
43787 if (j != i)
43788 msk |= (1 << i);
43790 for (i = 0; i < nelt; i++)
43791 if (dfirst.perm[i] == 0xff)
43792 dfirst.perm[i] = i;
43794 if (!d->testing_p)
43795 dfirst.target = gen_reg_rtx (dfirst.vmode);
43797 start_sequence ();
43798 ok = expand_vec_perm_1 (&dfirst);
43799 seq = get_insns ();
43800 end_sequence ();
43802 if (!ok)
43803 return false;
43805 if (d->testing_p)
43806 return true;
43808 emit_insn (seq);
43810 dsecond = *d;
43811 dsecond.op0 = dfirst.target;
43812 dsecond.op1 = dfirst.target;
43813 dsecond.one_operand_p = true;
43814 dsecond.target = gen_reg_rtx (dsecond.vmode);
43815 for (i = 0; i < nelt; i++)
43816 dsecond.perm[i] = i ^ nelt2;
43818 ok = expand_vec_perm_1 (&dsecond);
43819 gcc_assert (ok);
43821 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43822 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43823 return true;
43826 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43827 permutation using two vperm2f128, followed by a vshufpd insn blending
43828 the two vectors together. */
43830 static bool
43831 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43833 struct expand_vec_perm_d dfirst, dsecond, dthird;
43834 bool ok;
43836 if (!TARGET_AVX || (d->vmode != V4DFmode))
43837 return false;
43839 if (d->testing_p)
43840 return true;
43842 dfirst = *d;
43843 dsecond = *d;
43844 dthird = *d;
43846 dfirst.perm[0] = (d->perm[0] & ~1);
43847 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43848 dfirst.perm[2] = (d->perm[2] & ~1);
43849 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43850 dsecond.perm[0] = (d->perm[1] & ~1);
43851 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43852 dsecond.perm[2] = (d->perm[3] & ~1);
43853 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43854 dthird.perm[0] = (d->perm[0] % 2);
43855 dthird.perm[1] = (d->perm[1] % 2) + 4;
43856 dthird.perm[2] = (d->perm[2] % 2) + 2;
43857 dthird.perm[3] = (d->perm[3] % 2) + 6;
43859 dfirst.target = gen_reg_rtx (dfirst.vmode);
43860 dsecond.target = gen_reg_rtx (dsecond.vmode);
43861 dthird.op0 = dfirst.target;
43862 dthird.op1 = dsecond.target;
43863 dthird.one_operand_p = false;
43865 canonicalize_perm (&dfirst);
43866 canonicalize_perm (&dsecond);
43868 ok = expand_vec_perm_1 (&dfirst)
43869 && expand_vec_perm_1 (&dsecond)
43870 && expand_vec_perm_1 (&dthird);
43872 gcc_assert (ok);
43874 return true;
43877 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43878 permutation with two pshufb insns and an ior. We should have already
43879 failed all two instruction sequences. */
43881 static bool
43882 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43884 rtx rperm[2][16], vperm, l, h, op, m128;
43885 unsigned int i, nelt, eltsz;
43887 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43888 return false;
43889 gcc_assert (!d->one_operand_p);
43891 if (d->testing_p)
43892 return true;
43894 nelt = d->nelt;
43895 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43897 /* Generate two permutation masks. If the required element is within
43898 the given vector it is shuffled into the proper lane. If the required
43899 element is in the other vector, force a zero into the lane by setting
43900 bit 7 in the permutation mask. */
43901 m128 = GEN_INT (-128);
43902 for (i = 0; i < nelt; ++i)
43904 unsigned j, e = d->perm[i];
43905 unsigned which = (e >= nelt);
43906 if (e >= nelt)
43907 e -= nelt;
43909 for (j = 0; j < eltsz; ++j)
43911 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43912 rperm[1-which][i*eltsz + j] = m128;
43916 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43917 vperm = force_reg (V16QImode, vperm);
43919 l = gen_reg_rtx (V16QImode);
43920 op = gen_lowpart (V16QImode, d->op0);
43921 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43923 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43924 vperm = force_reg (V16QImode, vperm);
43926 h = gen_reg_rtx (V16QImode);
43927 op = gen_lowpart (V16QImode, d->op1);
43928 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43930 op = d->target;
43931 if (d->vmode != V16QImode)
43932 op = gen_reg_rtx (V16QImode);
43933 emit_insn (gen_iorv16qi3 (op, l, h));
43934 if (op != d->target)
43935 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43937 return true;
43940 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43941 with two vpshufb insns, vpermq and vpor. We should have already failed
43942 all two or three instruction sequences. */
43944 static bool
43945 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43947 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43948 unsigned int i, nelt, eltsz;
43950 if (!TARGET_AVX2
43951 || !d->one_operand_p
43952 || (d->vmode != V32QImode && d->vmode != V16HImode))
43953 return false;
43955 if (d->testing_p)
43956 return true;
43958 nelt = d->nelt;
43959 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43961 /* Generate two permutation masks. If the required element is within
43962 the same lane, it is shuffled in. If the required element from the
43963 other lane, force a zero by setting bit 7 in the permutation mask.
43964 In the other mask the mask has non-negative elements if element
43965 is requested from the other lane, but also moved to the other lane,
43966 so that the result of vpshufb can have the two V2TImode halves
43967 swapped. */
43968 m128 = GEN_INT (-128);
43969 for (i = 0; i < nelt; ++i)
43971 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43972 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43974 for (j = 0; j < eltsz; ++j)
43976 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43977 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43981 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43982 vperm = force_reg (V32QImode, vperm);
43984 h = gen_reg_rtx (V32QImode);
43985 op = gen_lowpart (V32QImode, d->op0);
43986 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43988 /* Swap the 128-byte lanes of h into hp. */
43989 hp = gen_reg_rtx (V4DImode);
43990 op = gen_lowpart (V4DImode, h);
43991 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43992 const1_rtx));
43994 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43995 vperm = force_reg (V32QImode, vperm);
43997 l = gen_reg_rtx (V32QImode);
43998 op = gen_lowpart (V32QImode, d->op0);
43999 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44001 op = d->target;
44002 if (d->vmode != V32QImode)
44003 op = gen_reg_rtx (V32QImode);
44004 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44005 if (op != d->target)
44006 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44008 return true;
44011 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44012 and extract-odd permutations of two V32QImode and V16QImode operand
44013 with two vpshufb insns, vpor and vpermq. We should have already
44014 failed all two or three instruction sequences. */
44016 static bool
44017 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44019 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44020 unsigned int i, nelt, eltsz;
44022 if (!TARGET_AVX2
44023 || d->one_operand_p
44024 || (d->vmode != V32QImode && d->vmode != V16HImode))
44025 return false;
44027 for (i = 0; i < d->nelt; ++i)
44028 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44029 return false;
44031 if (d->testing_p)
44032 return true;
44034 nelt = d->nelt;
44035 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44037 /* Generate two permutation masks. In the first permutation mask
44038 the first quarter will contain indexes for the first half
44039 of the op0, the second quarter will contain bit 7 set, third quarter
44040 will contain indexes for the second half of the op0 and the
44041 last quarter bit 7 set. In the second permutation mask
44042 the first quarter will contain bit 7 set, the second quarter
44043 indexes for the first half of the op1, the third quarter bit 7 set
44044 and last quarter indexes for the second half of the op1.
44045 I.e. the first mask e.g. for V32QImode extract even will be:
44046 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44047 (all values masked with 0xf except for -128) and second mask
44048 for extract even will be
44049 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44050 m128 = GEN_INT (-128);
44051 for (i = 0; i < nelt; ++i)
44053 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44054 unsigned which = d->perm[i] >= nelt;
44055 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44057 for (j = 0; j < eltsz; ++j)
44059 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44060 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44064 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44065 vperm = force_reg (V32QImode, vperm);
44067 l = gen_reg_rtx (V32QImode);
44068 op = gen_lowpart (V32QImode, d->op0);
44069 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44071 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44072 vperm = force_reg (V32QImode, vperm);
44074 h = gen_reg_rtx (V32QImode);
44075 op = gen_lowpart (V32QImode, d->op1);
44076 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44078 ior = gen_reg_rtx (V32QImode);
44079 emit_insn (gen_iorv32qi3 (ior, l, h));
44081 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44082 op = gen_reg_rtx (V4DImode);
44083 ior = gen_lowpart (V4DImode, ior);
44084 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44085 const1_rtx, GEN_INT (3)));
44086 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44088 return true;
44091 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44092 and extract-odd permutations. */
44094 static bool
44095 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44097 rtx t1, t2, t3, t4, t5;
44099 switch (d->vmode)
44101 case V4DFmode:
44102 if (d->testing_p)
44103 break;
44104 t1 = gen_reg_rtx (V4DFmode);
44105 t2 = gen_reg_rtx (V4DFmode);
44107 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44108 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44109 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44111 /* Now an unpck[lh]pd will produce the result required. */
44112 if (odd)
44113 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44114 else
44115 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44116 emit_insn (t3);
44117 break;
44119 case V8SFmode:
44121 int mask = odd ? 0xdd : 0x88;
44123 if (d->testing_p)
44124 break;
44125 t1 = gen_reg_rtx (V8SFmode);
44126 t2 = gen_reg_rtx (V8SFmode);
44127 t3 = gen_reg_rtx (V8SFmode);
44129 /* Shuffle within the 128-bit lanes to produce:
44130 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44131 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44132 GEN_INT (mask)));
44134 /* Shuffle the lanes around to produce:
44135 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44136 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44137 GEN_INT (0x3)));
44139 /* Shuffle within the 128-bit lanes to produce:
44140 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44141 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44143 /* Shuffle within the 128-bit lanes to produce:
44144 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44145 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44147 /* Shuffle the lanes around to produce:
44148 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44149 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44150 GEN_INT (0x20)));
44152 break;
44154 case V2DFmode:
44155 case V4SFmode:
44156 case V2DImode:
44157 case V4SImode:
44158 /* These are always directly implementable by expand_vec_perm_1. */
44159 gcc_unreachable ();
44161 case V8HImode:
44162 if (TARGET_SSSE3)
44163 return expand_vec_perm_pshufb2 (d);
44164 else
44166 if (d->testing_p)
44167 break;
44168 /* We need 2*log2(N)-1 operations to achieve odd/even
44169 with interleave. */
44170 t1 = gen_reg_rtx (V8HImode);
44171 t2 = gen_reg_rtx (V8HImode);
44172 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44173 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44174 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44175 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44176 if (odd)
44177 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44178 else
44179 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44180 emit_insn (t3);
44182 break;
44184 case V16QImode:
44185 if (TARGET_SSSE3)
44186 return expand_vec_perm_pshufb2 (d);
44187 else
44189 if (d->testing_p)
44190 break;
44191 t1 = gen_reg_rtx (V16QImode);
44192 t2 = gen_reg_rtx (V16QImode);
44193 t3 = gen_reg_rtx (V16QImode);
44194 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44195 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44196 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44197 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44198 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44199 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44200 if (odd)
44201 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44202 else
44203 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44204 emit_insn (t3);
44206 break;
44208 case V16HImode:
44209 case V32QImode:
44210 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44212 case V4DImode:
44213 if (!TARGET_AVX2)
44215 struct expand_vec_perm_d d_copy = *d;
44216 d_copy.vmode = V4DFmode;
44217 if (d->testing_p)
44218 d_copy.target = gen_lowpart (V4DFmode, d->target);
44219 else
44220 d_copy.target = gen_reg_rtx (V4DFmode);
44221 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44222 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44223 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44225 if (!d->testing_p)
44226 emit_move_insn (d->target,
44227 gen_lowpart (V4DImode, d_copy.target));
44228 return true;
44230 return false;
44233 if (d->testing_p)
44234 break;
44236 t1 = gen_reg_rtx (V4DImode);
44237 t2 = gen_reg_rtx (V4DImode);
44239 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44240 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44241 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44243 /* Now an vpunpck[lh]qdq will produce the result required. */
44244 if (odd)
44245 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44246 else
44247 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44248 emit_insn (t3);
44249 break;
44251 case V8SImode:
44252 if (!TARGET_AVX2)
44254 struct expand_vec_perm_d d_copy = *d;
44255 d_copy.vmode = V8SFmode;
44256 if (d->testing_p)
44257 d_copy.target = gen_lowpart (V8SFmode, d->target);
44258 else
44259 d_copy.target = gen_reg_rtx (V8SFmode);
44260 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44261 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44262 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44264 if (!d->testing_p)
44265 emit_move_insn (d->target,
44266 gen_lowpart (V8SImode, d_copy.target));
44267 return true;
44269 return false;
44272 if (d->testing_p)
44273 break;
44275 t1 = gen_reg_rtx (V8SImode);
44276 t2 = gen_reg_rtx (V8SImode);
44277 t3 = gen_reg_rtx (V4DImode);
44278 t4 = gen_reg_rtx (V4DImode);
44279 t5 = gen_reg_rtx (V4DImode);
44281 /* Shuffle the lanes around into
44282 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44283 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44284 gen_lowpart (V4DImode, d->op1),
44285 GEN_INT (0x20)));
44286 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44287 gen_lowpart (V4DImode, d->op1),
44288 GEN_INT (0x31)));
44290 /* Swap the 2nd and 3rd position in each lane into
44291 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44292 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44293 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44294 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44295 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44297 /* Now an vpunpck[lh]qdq will produce
44298 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44299 if (odd)
44300 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44301 gen_lowpart (V4DImode, t2));
44302 else
44303 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44304 gen_lowpart (V4DImode, t2));
44305 emit_insn (t3);
44306 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44307 break;
44309 default:
44310 gcc_unreachable ();
44313 return true;
44316 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44317 extract-even and extract-odd permutations. */
44319 static bool
44320 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44322 unsigned i, odd, nelt = d->nelt;
44324 odd = d->perm[0];
44325 if (odd != 0 && odd != 1)
44326 return false;
44328 for (i = 1; i < nelt; ++i)
44329 if (d->perm[i] != 2 * i + odd)
44330 return false;
44332 return expand_vec_perm_even_odd_1 (d, odd);
44335 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44336 permutations. We assume that expand_vec_perm_1 has already failed. */
44338 static bool
44339 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44341 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44342 enum machine_mode vmode = d->vmode;
44343 unsigned char perm2[4];
44344 rtx op0 = d->op0, dest;
44345 bool ok;
44347 switch (vmode)
44349 case V4DFmode:
44350 case V8SFmode:
44351 /* These are special-cased in sse.md so that we can optionally
44352 use the vbroadcast instruction. They expand to two insns
44353 if the input happens to be in a register. */
44354 gcc_unreachable ();
44356 case V2DFmode:
44357 case V2DImode:
44358 case V4SFmode:
44359 case V4SImode:
44360 /* These are always implementable using standard shuffle patterns. */
44361 gcc_unreachable ();
44363 case V8HImode:
44364 case V16QImode:
44365 /* These can be implemented via interleave. We save one insn by
44366 stopping once we have promoted to V4SImode and then use pshufd. */
44367 if (d->testing_p)
44368 return true;
44371 rtx dest;
44372 rtx (*gen) (rtx, rtx, rtx)
44373 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44374 : gen_vec_interleave_lowv8hi;
44376 if (elt >= nelt2)
44378 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44379 : gen_vec_interleave_highv8hi;
44380 elt -= nelt2;
44382 nelt2 /= 2;
44384 dest = gen_reg_rtx (vmode);
44385 emit_insn (gen (dest, op0, op0));
44386 vmode = get_mode_wider_vector (vmode);
44387 op0 = gen_lowpart (vmode, dest);
44389 while (vmode != V4SImode);
44391 memset (perm2, elt, 4);
44392 dest = gen_reg_rtx (V4SImode);
44393 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44394 gcc_assert (ok);
44395 if (!d->testing_p)
44396 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44397 return true;
44399 case V32QImode:
44400 case V16HImode:
44401 case V8SImode:
44402 case V4DImode:
44403 /* For AVX2 broadcasts of the first element vpbroadcast* or
44404 vpermq should be used by expand_vec_perm_1. */
44405 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44406 return false;
44408 default:
44409 gcc_unreachable ();
44413 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44414 broadcast permutations. */
44416 static bool
44417 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44419 unsigned i, elt, nelt = d->nelt;
44421 if (!d->one_operand_p)
44422 return false;
44424 elt = d->perm[0];
44425 for (i = 1; i < nelt; ++i)
44426 if (d->perm[i] != elt)
44427 return false;
44429 return expand_vec_perm_broadcast_1 (d);
44432 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44433 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44434 all the shorter instruction sequences. */
44436 static bool
44437 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44439 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44440 unsigned int i, nelt, eltsz;
44441 bool used[4];
44443 if (!TARGET_AVX2
44444 || d->one_operand_p
44445 || (d->vmode != V32QImode && d->vmode != V16HImode))
44446 return false;
44448 if (d->testing_p)
44449 return true;
44451 nelt = d->nelt;
44452 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44454 /* Generate 4 permutation masks. If the required element is within
44455 the same lane, it is shuffled in. If the required element from the
44456 other lane, force a zero by setting bit 7 in the permutation mask.
44457 In the other mask the mask has non-negative elements if element
44458 is requested from the other lane, but also moved to the other lane,
44459 so that the result of vpshufb can have the two V2TImode halves
44460 swapped. */
44461 m128 = GEN_INT (-128);
44462 for (i = 0; i < 32; ++i)
44464 rperm[0][i] = m128;
44465 rperm[1][i] = m128;
44466 rperm[2][i] = m128;
44467 rperm[3][i] = m128;
44469 used[0] = false;
44470 used[1] = false;
44471 used[2] = false;
44472 used[3] = false;
44473 for (i = 0; i < nelt; ++i)
44475 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44476 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44477 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44479 for (j = 0; j < eltsz; ++j)
44480 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44481 used[which] = true;
44484 for (i = 0; i < 2; ++i)
44486 if (!used[2 * i + 1])
44488 h[i] = NULL_RTX;
44489 continue;
44491 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44492 gen_rtvec_v (32, rperm[2 * i + 1]));
44493 vperm = force_reg (V32QImode, vperm);
44494 h[i] = gen_reg_rtx (V32QImode);
44495 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44496 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44499 /* Swap the 128-byte lanes of h[X]. */
44500 for (i = 0; i < 2; ++i)
44502 if (h[i] == NULL_RTX)
44503 continue;
44504 op = gen_reg_rtx (V4DImode);
44505 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44506 const2_rtx, GEN_INT (3), const0_rtx,
44507 const1_rtx));
44508 h[i] = gen_lowpart (V32QImode, op);
44511 for (i = 0; i < 2; ++i)
44513 if (!used[2 * i])
44515 l[i] = NULL_RTX;
44516 continue;
44518 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44519 vperm = force_reg (V32QImode, vperm);
44520 l[i] = gen_reg_rtx (V32QImode);
44521 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44522 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44525 for (i = 0; i < 2; ++i)
44527 if (h[i] && l[i])
44529 op = gen_reg_rtx (V32QImode);
44530 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44531 l[i] = op;
44533 else if (h[i])
44534 l[i] = h[i];
44537 gcc_assert (l[0] && l[1]);
44538 op = d->target;
44539 if (d->vmode != V32QImode)
44540 op = gen_reg_rtx (V32QImode);
44541 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44542 if (op != d->target)
44543 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44544 return true;
44547 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44548 With all of the interface bits taken care of, perform the expansion
44549 in D and return true on success. */
44551 static bool
44552 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44554 /* Try a single instruction expansion. */
44555 if (expand_vec_perm_1 (d))
44556 return true;
44558 /* Try sequences of two instructions. */
44560 if (expand_vec_perm_pshuflw_pshufhw (d))
44561 return true;
44563 if (expand_vec_perm_palignr (d))
44564 return true;
44566 if (expand_vec_perm_interleave2 (d))
44567 return true;
44569 if (expand_vec_perm_broadcast (d))
44570 return true;
44572 if (expand_vec_perm_vpermq_perm_1 (d))
44573 return true;
44575 if (expand_vec_perm_vperm2f128 (d))
44576 return true;
44578 /* Try sequences of three instructions. */
44580 if (expand_vec_perm_2vperm2f128_vshuf (d))
44581 return true;
44583 if (expand_vec_perm_pshufb2 (d))
44584 return true;
44586 if (expand_vec_perm_interleave3 (d))
44587 return true;
44589 if (expand_vec_perm_vperm2f128_vblend (d))
44590 return true;
44592 /* Try sequences of four instructions. */
44594 if (expand_vec_perm_vpshufb2_vpermq (d))
44595 return true;
44597 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44598 return true;
44600 /* ??? Look for narrow permutations whose element orderings would
44601 allow the promotion to a wider mode. */
44603 /* ??? Look for sequences of interleave or a wider permute that place
44604 the data into the correct lanes for a half-vector shuffle like
44605 pshuf[lh]w or vpermilps. */
44607 /* ??? Look for sequences of interleave that produce the desired results.
44608 The combinatorics of punpck[lh] get pretty ugly... */
44610 if (expand_vec_perm_even_odd (d))
44611 return true;
44613 /* Even longer sequences. */
44614 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44615 return true;
44617 return false;
44620 /* If a permutation only uses one operand, make it clear. Returns true
44621 if the permutation references both operands. */
44623 static bool
44624 canonicalize_perm (struct expand_vec_perm_d *d)
44626 int i, which, nelt = d->nelt;
44628 for (i = which = 0; i < nelt; ++i)
44629 which |= (d->perm[i] < nelt ? 1 : 2);
44631 d->one_operand_p = true;
44632 switch (which)
44634 default:
44635 gcc_unreachable();
44637 case 3:
44638 if (!rtx_equal_p (d->op0, d->op1))
44640 d->one_operand_p = false;
44641 break;
44643 /* The elements of PERM do not suggest that only the first operand
44644 is used, but both operands are identical. Allow easier matching
44645 of the permutation by folding the permutation into the single
44646 input vector. */
44647 /* FALLTHRU */
44649 case 2:
44650 for (i = 0; i < nelt; ++i)
44651 d->perm[i] &= nelt - 1;
44652 d->op0 = d->op1;
44653 break;
44655 case 1:
44656 d->op1 = d->op0;
44657 break;
44660 return (which == 3);
44663 bool
44664 ix86_expand_vec_perm_const (rtx operands[4])
44666 struct expand_vec_perm_d d;
44667 unsigned char perm[MAX_VECT_LEN];
44668 int i, nelt;
44669 bool two_args;
44670 rtx sel;
44672 d.target = operands[0];
44673 d.op0 = operands[1];
44674 d.op1 = operands[2];
44675 sel = operands[3];
44677 d.vmode = GET_MODE (d.target);
44678 gcc_assert (VECTOR_MODE_P (d.vmode));
44679 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44680 d.testing_p = false;
44682 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44683 gcc_assert (XVECLEN (sel, 0) == nelt);
44684 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44686 for (i = 0; i < nelt; ++i)
44688 rtx e = XVECEXP (sel, 0, i);
44689 int ei = INTVAL (e) & (2 * nelt - 1);
44690 d.perm[i] = ei;
44691 perm[i] = ei;
44694 two_args = canonicalize_perm (&d);
44696 if (ix86_expand_vec_perm_const_1 (&d))
44697 return true;
44699 /* If the selector says both arguments are needed, but the operands are the
44700 same, the above tried to expand with one_operand_p and flattened selector.
44701 If that didn't work, retry without one_operand_p; we succeeded with that
44702 during testing. */
44703 if (two_args && d.one_operand_p)
44705 d.one_operand_p = false;
44706 memcpy (d.perm, perm, sizeof (perm));
44707 return ix86_expand_vec_perm_const_1 (&d);
44710 return false;
44713 /* Implement targetm.vectorize.vec_perm_const_ok. */
44715 static bool
44716 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44717 const unsigned char *sel)
44719 struct expand_vec_perm_d d;
44720 unsigned int i, nelt, which;
44721 bool ret;
44723 d.vmode = vmode;
44724 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44725 d.testing_p = true;
44727 /* Given sufficient ISA support we can just return true here
44728 for selected vector modes. */
44729 if (d.vmode == V16SImode || d.vmode == V16SFmode
44730 || d.vmode == V8DFmode || d.vmode == V8DImode)
44731 /* All implementable with a single vpermi2 insn. */
44732 return true;
44733 if (GET_MODE_SIZE (d.vmode) == 16)
44735 /* All implementable with a single vpperm insn. */
44736 if (TARGET_XOP)
44737 return true;
44738 /* All implementable with 2 pshufb + 1 ior. */
44739 if (TARGET_SSSE3)
44740 return true;
44741 /* All implementable with shufpd or unpck[lh]pd. */
44742 if (d.nelt == 2)
44743 return true;
44746 /* Extract the values from the vector CST into the permutation
44747 array in D. */
44748 memcpy (d.perm, sel, nelt);
44749 for (i = which = 0; i < nelt; ++i)
44751 unsigned char e = d.perm[i];
44752 gcc_assert (e < 2 * nelt);
44753 which |= (e < nelt ? 1 : 2);
44756 /* For all elements from second vector, fold the elements to first. */
44757 if (which == 2)
44758 for (i = 0; i < nelt; ++i)
44759 d.perm[i] -= nelt;
44761 /* Check whether the mask can be applied to the vector type. */
44762 d.one_operand_p = (which != 3);
44764 /* Implementable with shufps or pshufd. */
44765 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44766 return true;
44768 /* Otherwise we have to go through the motions and see if we can
44769 figure out how to generate the requested permutation. */
44770 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44771 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44772 if (!d.one_operand_p)
44773 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44775 start_sequence ();
44776 ret = ix86_expand_vec_perm_const_1 (&d);
44777 end_sequence ();
44779 return ret;
44782 void
44783 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44785 struct expand_vec_perm_d d;
44786 unsigned i, nelt;
44788 d.target = targ;
44789 d.op0 = op0;
44790 d.op1 = op1;
44791 d.vmode = GET_MODE (targ);
44792 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44793 d.one_operand_p = false;
44794 d.testing_p = false;
44796 for (i = 0; i < nelt; ++i)
44797 d.perm[i] = i * 2 + odd;
44799 /* We'll either be able to implement the permutation directly... */
44800 if (expand_vec_perm_1 (&d))
44801 return;
44803 /* ... or we use the special-case patterns. */
44804 expand_vec_perm_even_odd_1 (&d, odd);
44807 static void
44808 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44810 struct expand_vec_perm_d d;
44811 unsigned i, nelt, base;
44812 bool ok;
44814 d.target = targ;
44815 d.op0 = op0;
44816 d.op1 = op1;
44817 d.vmode = GET_MODE (targ);
44818 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44819 d.one_operand_p = false;
44820 d.testing_p = false;
44822 base = high_p ? nelt / 2 : 0;
44823 for (i = 0; i < nelt / 2; ++i)
44825 d.perm[i * 2] = i + base;
44826 d.perm[i * 2 + 1] = i + base + nelt;
44829 /* Note that for AVX this isn't one instruction. */
44830 ok = ix86_expand_vec_perm_const_1 (&d);
44831 gcc_assert (ok);
44835 /* Expand a vector operation CODE for a V*QImode in terms of the
44836 same operation on V*HImode. */
44838 void
44839 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44841 enum machine_mode qimode = GET_MODE (dest);
44842 enum machine_mode himode;
44843 rtx (*gen_il) (rtx, rtx, rtx);
44844 rtx (*gen_ih) (rtx, rtx, rtx);
44845 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44846 struct expand_vec_perm_d d;
44847 bool ok, full_interleave;
44848 bool uns_p = false;
44849 int i;
44851 switch (qimode)
44853 case V16QImode:
44854 himode = V8HImode;
44855 gen_il = gen_vec_interleave_lowv16qi;
44856 gen_ih = gen_vec_interleave_highv16qi;
44857 break;
44858 case V32QImode:
44859 himode = V16HImode;
44860 gen_il = gen_avx2_interleave_lowv32qi;
44861 gen_ih = gen_avx2_interleave_highv32qi;
44862 break;
44863 default:
44864 gcc_unreachable ();
44867 op2_l = op2_h = op2;
44868 switch (code)
44870 case MULT:
44871 /* Unpack data such that we've got a source byte in each low byte of
44872 each word. We don't care what goes into the high byte of each word.
44873 Rather than trying to get zero in there, most convenient is to let
44874 it be a copy of the low byte. */
44875 op2_l = gen_reg_rtx (qimode);
44876 op2_h = gen_reg_rtx (qimode);
44877 emit_insn (gen_il (op2_l, op2, op2));
44878 emit_insn (gen_ih (op2_h, op2, op2));
44879 /* FALLTHRU */
44881 op1_l = gen_reg_rtx (qimode);
44882 op1_h = gen_reg_rtx (qimode);
44883 emit_insn (gen_il (op1_l, op1, op1));
44884 emit_insn (gen_ih (op1_h, op1, op1));
44885 full_interleave = qimode == V16QImode;
44886 break;
44888 case ASHIFT:
44889 case LSHIFTRT:
44890 uns_p = true;
44891 /* FALLTHRU */
44892 case ASHIFTRT:
44893 op1_l = gen_reg_rtx (himode);
44894 op1_h = gen_reg_rtx (himode);
44895 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44896 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44897 full_interleave = true;
44898 break;
44899 default:
44900 gcc_unreachable ();
44903 /* Perform the operation. */
44904 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44905 1, OPTAB_DIRECT);
44906 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44907 1, OPTAB_DIRECT);
44908 gcc_assert (res_l && res_h);
44910 /* Merge the data back into the right place. */
44911 d.target = dest;
44912 d.op0 = gen_lowpart (qimode, res_l);
44913 d.op1 = gen_lowpart (qimode, res_h);
44914 d.vmode = qimode;
44915 d.nelt = GET_MODE_NUNITS (qimode);
44916 d.one_operand_p = false;
44917 d.testing_p = false;
44919 if (full_interleave)
44921 /* For SSE2, we used an full interleave, so the desired
44922 results are in the even elements. */
44923 for (i = 0; i < 32; ++i)
44924 d.perm[i] = i * 2;
44926 else
44928 /* For AVX, the interleave used above was not cross-lane. So the
44929 extraction is evens but with the second and third quarter swapped.
44930 Happily, that is even one insn shorter than even extraction. */
44931 for (i = 0; i < 32; ++i)
44932 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44935 ok = ix86_expand_vec_perm_const_1 (&d);
44936 gcc_assert (ok);
44938 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44939 gen_rtx_fmt_ee (code, qimode, op1, op2));
44942 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44943 if op is CONST_VECTOR with all odd elements equal to their
44944 preceding element. */
44946 static bool
44947 const_vector_equal_evenodd_p (rtx op)
44949 enum machine_mode mode = GET_MODE (op);
44950 int i, nunits = GET_MODE_NUNITS (mode);
44951 if (GET_CODE (op) != CONST_VECTOR
44952 || nunits != CONST_VECTOR_NUNITS (op))
44953 return false;
44954 for (i = 0; i < nunits; i += 2)
44955 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44956 return false;
44957 return true;
44960 void
44961 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44962 bool uns_p, bool odd_p)
44964 enum machine_mode mode = GET_MODE (op1);
44965 enum machine_mode wmode = GET_MODE (dest);
44966 rtx x;
44967 rtx orig_op1 = op1, orig_op2 = op2;
44969 if (!nonimmediate_operand (op1, mode))
44970 op1 = force_reg (mode, op1);
44971 if (!nonimmediate_operand (op2, mode))
44972 op2 = force_reg (mode, op2);
44974 /* We only play even/odd games with vectors of SImode. */
44975 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44977 /* If we're looking for the odd results, shift those members down to
44978 the even slots. For some cpus this is faster than a PSHUFD. */
44979 if (odd_p)
44981 /* For XOP use vpmacsdqh, but only for smult, as it is only
44982 signed. */
44983 if (TARGET_XOP && mode == V4SImode && !uns_p)
44985 x = force_reg (wmode, CONST0_RTX (wmode));
44986 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44987 return;
44990 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44991 if (!const_vector_equal_evenodd_p (orig_op1))
44992 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44993 x, NULL, 1, OPTAB_DIRECT);
44994 if (!const_vector_equal_evenodd_p (orig_op2))
44995 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44996 x, NULL, 1, OPTAB_DIRECT);
44997 op1 = gen_lowpart (mode, op1);
44998 op2 = gen_lowpart (mode, op2);
45001 if (mode == V16SImode)
45003 if (uns_p)
45004 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45005 else
45006 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45008 else if (mode == V8SImode)
45010 if (uns_p)
45011 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45012 else
45013 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45015 else if (uns_p)
45016 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45017 else if (TARGET_SSE4_1)
45018 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45019 else
45021 rtx s1, s2, t0, t1, t2;
45023 /* The easiest way to implement this without PMULDQ is to go through
45024 the motions as if we are performing a full 64-bit multiply. With
45025 the exception that we need to do less shuffling of the elements. */
45027 /* Compute the sign-extension, aka highparts, of the two operands. */
45028 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45029 op1, pc_rtx, pc_rtx);
45030 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45031 op2, pc_rtx, pc_rtx);
45033 /* Multiply LO(A) * HI(B), and vice-versa. */
45034 t1 = gen_reg_rtx (wmode);
45035 t2 = gen_reg_rtx (wmode);
45036 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45037 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45039 /* Multiply LO(A) * LO(B). */
45040 t0 = gen_reg_rtx (wmode);
45041 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45043 /* Combine and shift the highparts into place. */
45044 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45045 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45046 1, OPTAB_DIRECT);
45048 /* Combine high and low parts. */
45049 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45050 return;
45052 emit_insn (x);
45055 void
45056 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45057 bool uns_p, bool high_p)
45059 enum machine_mode wmode = GET_MODE (dest);
45060 enum machine_mode mode = GET_MODE (op1);
45061 rtx t1, t2, t3, t4, mask;
45063 switch (mode)
45065 case V4SImode:
45066 t1 = gen_reg_rtx (mode);
45067 t2 = gen_reg_rtx (mode);
45068 if (TARGET_XOP && !uns_p)
45070 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45071 shuffle the elements once so that all elements are in the right
45072 place for immediate use: { A C B D }. */
45073 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45074 const1_rtx, GEN_INT (3)));
45075 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45076 const1_rtx, GEN_INT (3)));
45078 else
45080 /* Put the elements into place for the multiply. */
45081 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45082 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45083 high_p = false;
45085 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45086 break;
45088 case V8SImode:
45089 /* Shuffle the elements between the lanes. After this we
45090 have { A B E F | C D G H } for each operand. */
45091 t1 = gen_reg_rtx (V4DImode);
45092 t2 = gen_reg_rtx (V4DImode);
45093 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45094 const0_rtx, const2_rtx,
45095 const1_rtx, GEN_INT (3)));
45096 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45097 const0_rtx, const2_rtx,
45098 const1_rtx, GEN_INT (3)));
45100 /* Shuffle the elements within the lanes. After this we
45101 have { A A B B | C C D D } or { E E F F | G G H H }. */
45102 t3 = gen_reg_rtx (V8SImode);
45103 t4 = gen_reg_rtx (V8SImode);
45104 mask = GEN_INT (high_p
45105 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45106 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45107 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45108 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45110 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45111 break;
45113 case V8HImode:
45114 case V16HImode:
45115 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45116 uns_p, OPTAB_DIRECT);
45117 t2 = expand_binop (mode,
45118 uns_p ? umul_highpart_optab : smul_highpart_optab,
45119 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45120 gcc_assert (t1 && t2);
45122 t3 = gen_reg_rtx (mode);
45123 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45124 emit_move_insn (dest, gen_lowpart (wmode, t3));
45125 break;
45127 case V16QImode:
45128 case V32QImode:
45129 t1 = gen_reg_rtx (wmode);
45130 t2 = gen_reg_rtx (wmode);
45131 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45132 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45134 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45135 break;
45137 default:
45138 gcc_unreachable ();
45142 void
45143 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45145 rtx res_1, res_2, res_3, res_4;
45147 res_1 = gen_reg_rtx (V4SImode);
45148 res_2 = gen_reg_rtx (V4SImode);
45149 res_3 = gen_reg_rtx (V2DImode);
45150 res_4 = gen_reg_rtx (V2DImode);
45151 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45152 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45154 /* Move the results in element 2 down to element 1; we don't care
45155 what goes in elements 2 and 3. Then we can merge the parts
45156 back together with an interleave.
45158 Note that two other sequences were tried:
45159 (1) Use interleaves at the start instead of psrldq, which allows
45160 us to use a single shufps to merge things back at the end.
45161 (2) Use shufps here to combine the two vectors, then pshufd to
45162 put the elements in the correct order.
45163 In both cases the cost of the reformatting stall was too high
45164 and the overall sequence slower. */
45166 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45167 const0_rtx, const2_rtx,
45168 const0_rtx, const0_rtx));
45169 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45170 const0_rtx, const2_rtx,
45171 const0_rtx, const0_rtx));
45172 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45174 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45177 void
45178 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45180 enum machine_mode mode = GET_MODE (op0);
45181 rtx t1, t2, t3, t4, t5, t6;
45183 if (TARGET_XOP && mode == V2DImode)
45185 /* op1: A,B,C,D, op2: E,F,G,H */
45186 op1 = gen_lowpart (V4SImode, op1);
45187 op2 = gen_lowpart (V4SImode, op2);
45189 t1 = gen_reg_rtx (V4SImode);
45190 t2 = gen_reg_rtx (V4SImode);
45191 t3 = gen_reg_rtx (V2DImode);
45192 t4 = gen_reg_rtx (V2DImode);
45194 /* t1: B,A,D,C */
45195 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45196 GEN_INT (1),
45197 GEN_INT (0),
45198 GEN_INT (3),
45199 GEN_INT (2)));
45201 /* t2: (B*E),(A*F),(D*G),(C*H) */
45202 emit_insn (gen_mulv4si3 (t2, t1, op2));
45204 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45205 emit_insn (gen_xop_phadddq (t3, t2));
45207 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45208 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45210 /* Multiply lower parts and add all */
45211 t5 = gen_reg_rtx (V2DImode);
45212 emit_insn (gen_vec_widen_umult_even_v4si (t5, gen_lowpart (V4SImode, op1), gen_lowpart (V4SImode, op2)));
45213 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45215 else
45217 enum machine_mode nmode;
45218 rtx (*umul) (rtx, rtx, rtx);
45220 if (mode == V2DImode)
45222 umul = gen_vec_widen_umult_even_v4si;
45223 nmode = V4SImode;
45225 else if (mode == V4DImode)
45227 umul = gen_vec_widen_umult_even_v8si;
45228 nmode = V8SImode;
45230 else if (mode == V8DImode)
45232 umul = gen_vec_widen_umult_even_v16si;
45233 nmode = V16SImode;
45235 else
45236 gcc_unreachable ();
45239 /* Multiply low parts. */
45240 t1 = gen_reg_rtx (mode);
45241 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45243 /* Shift input vectors right 32 bits so we can multiply high parts. */
45244 t6 = GEN_INT (32);
45245 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45246 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45248 /* Multiply high parts by low parts. */
45249 t4 = gen_reg_rtx (mode);
45250 t5 = gen_reg_rtx (mode);
45251 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45252 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45254 /* Combine and shift the highparts back. */
45255 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45256 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45258 /* Combine high and low parts. */
45259 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45262 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45263 gen_rtx_MULT (mode, op1, op2));
45266 /* Calculate integer abs() using only SSE2 instructions. */
45268 void
45269 ix86_expand_sse2_abs (rtx target, rtx input)
45271 enum machine_mode mode = GET_MODE (target);
45272 rtx tmp0, tmp1, x;
45274 switch (mode)
45276 /* For 32-bit signed integer X, the best way to calculate the absolute
45277 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45278 case V4SImode:
45279 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45280 GEN_INT (GET_MODE_BITSIZE
45281 (GET_MODE_INNER (mode)) - 1),
45282 NULL, 0, OPTAB_DIRECT);
45283 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45284 NULL, 0, OPTAB_DIRECT);
45285 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45286 target, 0, OPTAB_DIRECT);
45287 break;
45289 /* For 16-bit signed integer X, the best way to calculate the absolute
45290 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45291 case V8HImode:
45292 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45294 x = expand_simple_binop (mode, SMAX, tmp0, input,
45295 target, 0, OPTAB_DIRECT);
45296 break;
45298 /* For 8-bit signed integer X, the best way to calculate the absolute
45299 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45300 as SSE2 provides the PMINUB insn. */
45301 case V16QImode:
45302 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45304 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45305 target, 0, OPTAB_DIRECT);
45306 break;
45308 default:
45309 gcc_unreachable ();
45312 if (x != target)
45313 emit_move_insn (target, x);
45316 /* Expand an insert into a vector register through pinsr insn.
45317 Return true if successful. */
45319 bool
45320 ix86_expand_pinsr (rtx *operands)
45322 rtx dst = operands[0];
45323 rtx src = operands[3];
45325 unsigned int size = INTVAL (operands[1]);
45326 unsigned int pos = INTVAL (operands[2]);
45328 if (GET_CODE (dst) == SUBREG)
45330 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45331 dst = SUBREG_REG (dst);
45334 if (GET_CODE (src) == SUBREG)
45335 src = SUBREG_REG (src);
45337 switch (GET_MODE (dst))
45339 case V16QImode:
45340 case V8HImode:
45341 case V4SImode:
45342 case V2DImode:
45344 enum machine_mode srcmode, dstmode;
45345 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45347 srcmode = mode_for_size (size, MODE_INT, 0);
45349 switch (srcmode)
45351 case QImode:
45352 if (!TARGET_SSE4_1)
45353 return false;
45354 dstmode = V16QImode;
45355 pinsr = gen_sse4_1_pinsrb;
45356 break;
45358 case HImode:
45359 if (!TARGET_SSE2)
45360 return false;
45361 dstmode = V8HImode;
45362 pinsr = gen_sse2_pinsrw;
45363 break;
45365 case SImode:
45366 if (!TARGET_SSE4_1)
45367 return false;
45368 dstmode = V4SImode;
45369 pinsr = gen_sse4_1_pinsrd;
45370 break;
45372 case DImode:
45373 gcc_assert (TARGET_64BIT);
45374 if (!TARGET_SSE4_1)
45375 return false;
45376 dstmode = V2DImode;
45377 pinsr = gen_sse4_1_pinsrq;
45378 break;
45380 default:
45381 return false;
45384 rtx d = dst;
45385 if (GET_MODE (dst) != dstmode)
45386 d = gen_reg_rtx (dstmode);
45387 src = gen_lowpart (srcmode, src);
45389 pos /= size;
45391 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45392 GEN_INT (1 << pos)));
45393 if (d != dst)
45394 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45395 return true;
45398 default:
45399 return false;
45403 /* This function returns the calling abi specific va_list type node.
45404 It returns the FNDECL specific va_list type. */
45406 static tree
45407 ix86_fn_abi_va_list (tree fndecl)
45409 if (!TARGET_64BIT)
45410 return va_list_type_node;
45411 gcc_assert (fndecl != NULL_TREE);
45413 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45414 return ms_va_list_type_node;
45415 else
45416 return sysv_va_list_type_node;
45419 /* Returns the canonical va_list type specified by TYPE. If there
45420 is no valid TYPE provided, it return NULL_TREE. */
45422 static tree
45423 ix86_canonical_va_list_type (tree type)
45425 tree wtype, htype;
45427 /* Resolve references and pointers to va_list type. */
45428 if (TREE_CODE (type) == MEM_REF)
45429 type = TREE_TYPE (type);
45430 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45431 type = TREE_TYPE (type);
45432 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45433 type = TREE_TYPE (type);
45435 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45437 wtype = va_list_type_node;
45438 gcc_assert (wtype != NULL_TREE);
45439 htype = type;
45440 if (TREE_CODE (wtype) == ARRAY_TYPE)
45442 /* If va_list is an array type, the argument may have decayed
45443 to a pointer type, e.g. by being passed to another function.
45444 In that case, unwrap both types so that we can compare the
45445 underlying records. */
45446 if (TREE_CODE (htype) == ARRAY_TYPE
45447 || POINTER_TYPE_P (htype))
45449 wtype = TREE_TYPE (wtype);
45450 htype = TREE_TYPE (htype);
45453 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45454 return va_list_type_node;
45455 wtype = sysv_va_list_type_node;
45456 gcc_assert (wtype != NULL_TREE);
45457 htype = type;
45458 if (TREE_CODE (wtype) == ARRAY_TYPE)
45460 /* If va_list is an array type, the argument may have decayed
45461 to a pointer type, e.g. by being passed to another function.
45462 In that case, unwrap both types so that we can compare the
45463 underlying records. */
45464 if (TREE_CODE (htype) == ARRAY_TYPE
45465 || POINTER_TYPE_P (htype))
45467 wtype = TREE_TYPE (wtype);
45468 htype = TREE_TYPE (htype);
45471 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45472 return sysv_va_list_type_node;
45473 wtype = ms_va_list_type_node;
45474 gcc_assert (wtype != NULL_TREE);
45475 htype = type;
45476 if (TREE_CODE (wtype) == ARRAY_TYPE)
45478 /* If va_list is an array type, the argument may have decayed
45479 to a pointer type, e.g. by being passed to another function.
45480 In that case, unwrap both types so that we can compare the
45481 underlying records. */
45482 if (TREE_CODE (htype) == ARRAY_TYPE
45483 || POINTER_TYPE_P (htype))
45485 wtype = TREE_TYPE (wtype);
45486 htype = TREE_TYPE (htype);
45489 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45490 return ms_va_list_type_node;
45491 return NULL_TREE;
45493 return std_canonical_va_list_type (type);
45496 /* Iterate through the target-specific builtin types for va_list.
45497 IDX denotes the iterator, *PTREE is set to the result type of
45498 the va_list builtin, and *PNAME to its internal type.
45499 Returns zero if there is no element for this index, otherwise
45500 IDX should be increased upon the next call.
45501 Note, do not iterate a base builtin's name like __builtin_va_list.
45502 Used from c_common_nodes_and_builtins. */
45504 static int
45505 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45507 if (TARGET_64BIT)
45509 switch (idx)
45511 default:
45512 break;
45514 case 0:
45515 *ptree = ms_va_list_type_node;
45516 *pname = "__builtin_ms_va_list";
45517 return 1;
45519 case 1:
45520 *ptree = sysv_va_list_type_node;
45521 *pname = "__builtin_sysv_va_list";
45522 return 1;
45526 return 0;
45529 #undef TARGET_SCHED_DISPATCH
45530 #define TARGET_SCHED_DISPATCH has_dispatch
45531 #undef TARGET_SCHED_DISPATCH_DO
45532 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45533 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45534 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45535 #undef TARGET_SCHED_REORDER
45536 #define TARGET_SCHED_REORDER ix86_sched_reorder
45537 #undef TARGET_SCHED_ADJUST_PRIORITY
45538 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45539 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45540 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45541 ix86_dependencies_evaluation_hook
45543 /* The size of the dispatch window is the total number of bytes of
45544 object code allowed in a window. */
45545 #define DISPATCH_WINDOW_SIZE 16
45547 /* Number of dispatch windows considered for scheduling. */
45548 #define MAX_DISPATCH_WINDOWS 3
45550 /* Maximum number of instructions in a window. */
45551 #define MAX_INSN 4
45553 /* Maximum number of immediate operands in a window. */
45554 #define MAX_IMM 4
45556 /* Maximum number of immediate bits allowed in a window. */
45557 #define MAX_IMM_SIZE 128
45559 /* Maximum number of 32 bit immediates allowed in a window. */
45560 #define MAX_IMM_32 4
45562 /* Maximum number of 64 bit immediates allowed in a window. */
45563 #define MAX_IMM_64 2
45565 /* Maximum total of loads or prefetches allowed in a window. */
45566 #define MAX_LOAD 2
45568 /* Maximum total of stores allowed in a window. */
45569 #define MAX_STORE 1
45571 #undef BIG
45572 #define BIG 100
45575 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45576 enum dispatch_group {
45577 disp_no_group = 0,
45578 disp_load,
45579 disp_store,
45580 disp_load_store,
45581 disp_prefetch,
45582 disp_imm,
45583 disp_imm_32,
45584 disp_imm_64,
45585 disp_branch,
45586 disp_cmp,
45587 disp_jcc,
45588 disp_last
45591 /* Number of allowable groups in a dispatch window. It is an array
45592 indexed by dispatch_group enum. 100 is used as a big number,
45593 because the number of these kind of operations does not have any
45594 effect in dispatch window, but we need them for other reasons in
45595 the table. */
45596 static unsigned int num_allowable_groups[disp_last] = {
45597 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45600 char group_name[disp_last + 1][16] = {
45601 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45602 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45603 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45606 /* Instruction path. */
45607 enum insn_path {
45608 no_path = 0,
45609 path_single, /* Single micro op. */
45610 path_double, /* Double micro op. */
45611 path_multi, /* Instructions with more than 2 micro op.. */
45612 last_path
45615 /* sched_insn_info defines a window to the instructions scheduled in
45616 the basic block. It contains a pointer to the insn_info table and
45617 the instruction scheduled.
45619 Windows are allocated for each basic block and are linked
45620 together. */
45621 typedef struct sched_insn_info_s {
45622 rtx insn;
45623 enum dispatch_group group;
45624 enum insn_path path;
45625 int byte_len;
45626 int imm_bytes;
45627 } sched_insn_info;
45629 /* Linked list of dispatch windows. This is a two way list of
45630 dispatch windows of a basic block. It contains information about
45631 the number of uops in the window and the total number of
45632 instructions and of bytes in the object code for this dispatch
45633 window. */
45634 typedef struct dispatch_windows_s {
45635 int num_insn; /* Number of insn in the window. */
45636 int num_uops; /* Number of uops in the window. */
45637 int window_size; /* Number of bytes in the window. */
45638 int window_num; /* Window number between 0 or 1. */
45639 int num_imm; /* Number of immediates in an insn. */
45640 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45641 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45642 int imm_size; /* Total immediates in the window. */
45643 int num_loads; /* Total memory loads in the window. */
45644 int num_stores; /* Total memory stores in the window. */
45645 int violation; /* Violation exists in window. */
45646 sched_insn_info *window; /* Pointer to the window. */
45647 struct dispatch_windows_s *next;
45648 struct dispatch_windows_s *prev;
45649 } dispatch_windows;
45651 /* Immediate valuse used in an insn. */
45652 typedef struct imm_info_s
45654 int imm;
45655 int imm32;
45656 int imm64;
45657 } imm_info;
45659 static dispatch_windows *dispatch_window_list;
45660 static dispatch_windows *dispatch_window_list1;
45662 /* Get dispatch group of insn. */
45664 static enum dispatch_group
45665 get_mem_group (rtx insn)
45667 enum attr_memory memory;
45669 if (INSN_CODE (insn) < 0)
45670 return disp_no_group;
45671 memory = get_attr_memory (insn);
45672 if (memory == MEMORY_STORE)
45673 return disp_store;
45675 if (memory == MEMORY_LOAD)
45676 return disp_load;
45678 if (memory == MEMORY_BOTH)
45679 return disp_load_store;
45681 return disp_no_group;
45684 /* Return true if insn is a compare instruction. */
45686 static bool
45687 is_cmp (rtx insn)
45689 enum attr_type type;
45691 type = get_attr_type (insn);
45692 return (type == TYPE_TEST
45693 || type == TYPE_ICMP
45694 || type == TYPE_FCMP
45695 || GET_CODE (PATTERN (insn)) == COMPARE);
45698 /* Return true if a dispatch violation encountered. */
45700 static bool
45701 dispatch_violation (void)
45703 if (dispatch_window_list->next)
45704 return dispatch_window_list->next->violation;
45705 return dispatch_window_list->violation;
45708 /* Return true if insn is a branch instruction. */
45710 static bool
45711 is_branch (rtx insn)
45713 return (CALL_P (insn) || JUMP_P (insn));
45716 /* Return true if insn is a prefetch instruction. */
45718 static bool
45719 is_prefetch (rtx insn)
45721 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45724 /* This function initializes a dispatch window and the list container holding a
45725 pointer to the window. */
45727 static void
45728 init_window (int window_num)
45730 int i;
45731 dispatch_windows *new_list;
45733 if (window_num == 0)
45734 new_list = dispatch_window_list;
45735 else
45736 new_list = dispatch_window_list1;
45738 new_list->num_insn = 0;
45739 new_list->num_uops = 0;
45740 new_list->window_size = 0;
45741 new_list->next = NULL;
45742 new_list->prev = NULL;
45743 new_list->window_num = window_num;
45744 new_list->num_imm = 0;
45745 new_list->num_imm_32 = 0;
45746 new_list->num_imm_64 = 0;
45747 new_list->imm_size = 0;
45748 new_list->num_loads = 0;
45749 new_list->num_stores = 0;
45750 new_list->violation = false;
45752 for (i = 0; i < MAX_INSN; i++)
45754 new_list->window[i].insn = NULL;
45755 new_list->window[i].group = disp_no_group;
45756 new_list->window[i].path = no_path;
45757 new_list->window[i].byte_len = 0;
45758 new_list->window[i].imm_bytes = 0;
45760 return;
45763 /* This function allocates and initializes a dispatch window and the
45764 list container holding a pointer to the window. */
45766 static dispatch_windows *
45767 allocate_window (void)
45769 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45770 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45772 return new_list;
45775 /* This routine initializes the dispatch scheduling information. It
45776 initiates building dispatch scheduler tables and constructs the
45777 first dispatch window. */
45779 static void
45780 init_dispatch_sched (void)
45782 /* Allocate a dispatch list and a window. */
45783 dispatch_window_list = allocate_window ();
45784 dispatch_window_list1 = allocate_window ();
45785 init_window (0);
45786 init_window (1);
45789 /* This function returns true if a branch is detected. End of a basic block
45790 does not have to be a branch, but here we assume only branches end a
45791 window. */
45793 static bool
45794 is_end_basic_block (enum dispatch_group group)
45796 return group == disp_branch;
45799 /* This function is called when the end of a window processing is reached. */
45801 static void
45802 process_end_window (void)
45804 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45805 if (dispatch_window_list->next)
45807 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45808 gcc_assert (dispatch_window_list->window_size
45809 + dispatch_window_list1->window_size <= 48);
45810 init_window (1);
45812 init_window (0);
45815 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45816 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45817 for 48 bytes of instructions. Note that these windows are not dispatch
45818 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45820 static dispatch_windows *
45821 allocate_next_window (int window_num)
45823 if (window_num == 0)
45825 if (dispatch_window_list->next)
45826 init_window (1);
45827 init_window (0);
45828 return dispatch_window_list;
45831 dispatch_window_list->next = dispatch_window_list1;
45832 dispatch_window_list1->prev = dispatch_window_list;
45834 return dispatch_window_list1;
45837 /* Increment the number of immediate operands of an instruction. */
45839 static int
45840 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45842 if (*in_rtx == 0)
45843 return 0;
45845 switch ( GET_CODE (*in_rtx))
45847 case CONST:
45848 case SYMBOL_REF:
45849 case CONST_INT:
45850 (imm_values->imm)++;
45851 if (x86_64_immediate_operand (*in_rtx, SImode))
45852 (imm_values->imm32)++;
45853 else
45854 (imm_values->imm64)++;
45855 break;
45857 case CONST_DOUBLE:
45858 (imm_values->imm)++;
45859 (imm_values->imm64)++;
45860 break;
45862 case CODE_LABEL:
45863 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45865 (imm_values->imm)++;
45866 (imm_values->imm32)++;
45868 break;
45870 default:
45871 break;
45874 return 0;
45877 /* Compute number of immediate operands of an instruction. */
45879 static void
45880 find_constant (rtx in_rtx, imm_info *imm_values)
45882 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45883 (rtx_function) find_constant_1, (void *) imm_values);
45886 /* Return total size of immediate operands of an instruction along with number
45887 of corresponding immediate-operands. It initializes its parameters to zero
45888 befor calling FIND_CONSTANT.
45889 INSN is the input instruction. IMM is the total of immediates.
45890 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45891 bit immediates. */
45893 static int
45894 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45896 imm_info imm_values = {0, 0, 0};
45898 find_constant (insn, &imm_values);
45899 *imm = imm_values.imm;
45900 *imm32 = imm_values.imm32;
45901 *imm64 = imm_values.imm64;
45902 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45905 /* This function indicates if an operand of an instruction is an
45906 immediate. */
45908 static bool
45909 has_immediate (rtx insn)
45911 int num_imm_operand;
45912 int num_imm32_operand;
45913 int num_imm64_operand;
45915 if (insn)
45916 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45917 &num_imm64_operand);
45918 return false;
45921 /* Return single or double path for instructions. */
45923 static enum insn_path
45924 get_insn_path (rtx insn)
45926 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45928 if ((int)path == 0)
45929 return path_single;
45931 if ((int)path == 1)
45932 return path_double;
45934 return path_multi;
45937 /* Return insn dispatch group. */
45939 static enum dispatch_group
45940 get_insn_group (rtx insn)
45942 enum dispatch_group group = get_mem_group (insn);
45943 if (group)
45944 return group;
45946 if (is_branch (insn))
45947 return disp_branch;
45949 if (is_cmp (insn))
45950 return disp_cmp;
45952 if (has_immediate (insn))
45953 return disp_imm;
45955 if (is_prefetch (insn))
45956 return disp_prefetch;
45958 return disp_no_group;
45961 /* Count number of GROUP restricted instructions in a dispatch
45962 window WINDOW_LIST. */
45964 static int
45965 count_num_restricted (rtx insn, dispatch_windows *window_list)
45967 enum dispatch_group group = get_insn_group (insn);
45968 int imm_size;
45969 int num_imm_operand;
45970 int num_imm32_operand;
45971 int num_imm64_operand;
45973 if (group == disp_no_group)
45974 return 0;
45976 if (group == disp_imm)
45978 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45979 &num_imm64_operand);
45980 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45981 || num_imm_operand + window_list->num_imm > MAX_IMM
45982 || (num_imm32_operand > 0
45983 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45984 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45985 || (num_imm64_operand > 0
45986 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45987 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45988 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45989 && num_imm64_operand > 0
45990 && ((window_list->num_imm_64 > 0
45991 && window_list->num_insn >= 2)
45992 || window_list->num_insn >= 3)))
45993 return BIG;
45995 return 1;
45998 if ((group == disp_load_store
45999 && (window_list->num_loads >= MAX_LOAD
46000 || window_list->num_stores >= MAX_STORE))
46001 || ((group == disp_load
46002 || group == disp_prefetch)
46003 && window_list->num_loads >= MAX_LOAD)
46004 || (group == disp_store
46005 && window_list->num_stores >= MAX_STORE))
46006 return BIG;
46008 return 1;
46011 /* This function returns true if insn satisfies dispatch rules on the
46012 last window scheduled. */
46014 static bool
46015 fits_dispatch_window (rtx insn)
46017 dispatch_windows *window_list = dispatch_window_list;
46018 dispatch_windows *window_list_next = dispatch_window_list->next;
46019 unsigned int num_restrict;
46020 enum dispatch_group group = get_insn_group (insn);
46021 enum insn_path path = get_insn_path (insn);
46022 int sum;
46024 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46025 instructions should be given the lowest priority in the
46026 scheduling process in Haifa scheduler to make sure they will be
46027 scheduled in the same dispatch window as the reference to them. */
46028 if (group == disp_jcc || group == disp_cmp)
46029 return false;
46031 /* Check nonrestricted. */
46032 if (group == disp_no_group || group == disp_branch)
46033 return true;
46035 /* Get last dispatch window. */
46036 if (window_list_next)
46037 window_list = window_list_next;
46039 if (window_list->window_num == 1)
46041 sum = window_list->prev->window_size + window_list->window_size;
46043 if (sum == 32
46044 || (min_insn_size (insn) + sum) >= 48)
46045 /* Window 1 is full. Go for next window. */
46046 return true;
46049 num_restrict = count_num_restricted (insn, window_list);
46051 if (num_restrict > num_allowable_groups[group])
46052 return false;
46054 /* See if it fits in the first window. */
46055 if (window_list->window_num == 0)
46057 /* The first widow should have only single and double path
46058 uops. */
46059 if (path == path_double
46060 && (window_list->num_uops + 2) > MAX_INSN)
46061 return false;
46062 else if (path != path_single)
46063 return false;
46065 return true;
46068 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46069 dispatch window WINDOW_LIST. */
46071 static void
46072 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46074 int byte_len = min_insn_size (insn);
46075 int num_insn = window_list->num_insn;
46076 int imm_size;
46077 sched_insn_info *window = window_list->window;
46078 enum dispatch_group group = get_insn_group (insn);
46079 enum insn_path path = get_insn_path (insn);
46080 int num_imm_operand;
46081 int num_imm32_operand;
46082 int num_imm64_operand;
46084 if (!window_list->violation && group != disp_cmp
46085 && !fits_dispatch_window (insn))
46086 window_list->violation = true;
46088 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46089 &num_imm64_operand);
46091 /* Initialize window with new instruction. */
46092 window[num_insn].insn = insn;
46093 window[num_insn].byte_len = byte_len;
46094 window[num_insn].group = group;
46095 window[num_insn].path = path;
46096 window[num_insn].imm_bytes = imm_size;
46098 window_list->window_size += byte_len;
46099 window_list->num_insn = num_insn + 1;
46100 window_list->num_uops = window_list->num_uops + num_uops;
46101 window_list->imm_size += imm_size;
46102 window_list->num_imm += num_imm_operand;
46103 window_list->num_imm_32 += num_imm32_operand;
46104 window_list->num_imm_64 += num_imm64_operand;
46106 if (group == disp_store)
46107 window_list->num_stores += 1;
46108 else if (group == disp_load
46109 || group == disp_prefetch)
46110 window_list->num_loads += 1;
46111 else if (group == disp_load_store)
46113 window_list->num_stores += 1;
46114 window_list->num_loads += 1;
46118 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46119 If the total bytes of instructions or the number of instructions in
46120 the window exceed allowable, it allocates a new window. */
46122 static void
46123 add_to_dispatch_window (rtx insn)
46125 int byte_len;
46126 dispatch_windows *window_list;
46127 dispatch_windows *next_list;
46128 dispatch_windows *window0_list;
46129 enum insn_path path;
46130 enum dispatch_group insn_group;
46131 bool insn_fits;
46132 int num_insn;
46133 int num_uops;
46134 int window_num;
46135 int insn_num_uops;
46136 int sum;
46138 if (INSN_CODE (insn) < 0)
46139 return;
46141 byte_len = min_insn_size (insn);
46142 window_list = dispatch_window_list;
46143 next_list = window_list->next;
46144 path = get_insn_path (insn);
46145 insn_group = get_insn_group (insn);
46147 /* Get the last dispatch window. */
46148 if (next_list)
46149 window_list = dispatch_window_list->next;
46151 if (path == path_single)
46152 insn_num_uops = 1;
46153 else if (path == path_double)
46154 insn_num_uops = 2;
46155 else
46156 insn_num_uops = (int) path;
46158 /* If current window is full, get a new window.
46159 Window number zero is full, if MAX_INSN uops are scheduled in it.
46160 Window number one is full, if window zero's bytes plus window
46161 one's bytes is 32, or if the bytes of the new instruction added
46162 to the total makes it greater than 48, or it has already MAX_INSN
46163 instructions in it. */
46164 num_insn = window_list->num_insn;
46165 num_uops = window_list->num_uops;
46166 window_num = window_list->window_num;
46167 insn_fits = fits_dispatch_window (insn);
46169 if (num_insn >= MAX_INSN
46170 || num_uops + insn_num_uops > MAX_INSN
46171 || !(insn_fits))
46173 window_num = ~window_num & 1;
46174 window_list = allocate_next_window (window_num);
46177 if (window_num == 0)
46179 add_insn_window (insn, window_list, insn_num_uops);
46180 if (window_list->num_insn >= MAX_INSN
46181 && insn_group == disp_branch)
46183 process_end_window ();
46184 return;
46187 else if (window_num == 1)
46189 window0_list = window_list->prev;
46190 sum = window0_list->window_size + window_list->window_size;
46191 if (sum == 32
46192 || (byte_len + sum) >= 48)
46194 process_end_window ();
46195 window_list = dispatch_window_list;
46198 add_insn_window (insn, window_list, insn_num_uops);
46200 else
46201 gcc_unreachable ();
46203 if (is_end_basic_block (insn_group))
46205 /* End of basic block is reached do end-basic-block process. */
46206 process_end_window ();
46207 return;
46211 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46213 DEBUG_FUNCTION static void
46214 debug_dispatch_window_file (FILE *file, int window_num)
46216 dispatch_windows *list;
46217 int i;
46219 if (window_num == 0)
46220 list = dispatch_window_list;
46221 else
46222 list = dispatch_window_list1;
46224 fprintf (file, "Window #%d:\n", list->window_num);
46225 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46226 list->num_insn, list->num_uops, list->window_size);
46227 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46228 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46230 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46231 list->num_stores);
46232 fprintf (file, " insn info:\n");
46234 for (i = 0; i < MAX_INSN; i++)
46236 if (!list->window[i].insn)
46237 break;
46238 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46239 i, group_name[list->window[i].group],
46240 i, (void *)list->window[i].insn,
46241 i, list->window[i].path,
46242 i, list->window[i].byte_len,
46243 i, list->window[i].imm_bytes);
46247 /* Print to stdout a dispatch window. */
46249 DEBUG_FUNCTION void
46250 debug_dispatch_window (int window_num)
46252 debug_dispatch_window_file (stdout, window_num);
46255 /* Print INSN dispatch information to FILE. */
46257 DEBUG_FUNCTION static void
46258 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46260 int byte_len;
46261 enum insn_path path;
46262 enum dispatch_group group;
46263 int imm_size;
46264 int num_imm_operand;
46265 int num_imm32_operand;
46266 int num_imm64_operand;
46268 if (INSN_CODE (insn) < 0)
46269 return;
46271 byte_len = min_insn_size (insn);
46272 path = get_insn_path (insn);
46273 group = get_insn_group (insn);
46274 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46275 &num_imm64_operand);
46277 fprintf (file, " insn info:\n");
46278 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46279 group_name[group], path, byte_len);
46280 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46281 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46284 /* Print to STDERR the status of the ready list with respect to
46285 dispatch windows. */
46287 DEBUG_FUNCTION void
46288 debug_ready_dispatch (void)
46290 int i;
46291 int no_ready = number_in_ready ();
46293 fprintf (stdout, "Number of ready: %d\n", no_ready);
46295 for (i = 0; i < no_ready; i++)
46296 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46299 /* This routine is the driver of the dispatch scheduler. */
46301 static void
46302 do_dispatch (rtx insn, int mode)
46304 if (mode == DISPATCH_INIT)
46305 init_dispatch_sched ();
46306 else if (mode == ADD_TO_DISPATCH_WINDOW)
46307 add_to_dispatch_window (insn);
46310 /* Return TRUE if Dispatch Scheduling is supported. */
46312 static bool
46313 has_dispatch (rtx insn, int action)
46315 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46316 && flag_dispatch_scheduler)
46317 switch (action)
46319 default:
46320 return false;
46322 case IS_DISPATCH_ON:
46323 return true;
46324 break;
46326 case IS_CMP:
46327 return is_cmp (insn);
46329 case DISPATCH_VIOLATION:
46330 return dispatch_violation ();
46332 case FITS_DISPATCH_WINDOW:
46333 return fits_dispatch_window (insn);
46336 return false;
46339 /* Implementation of reassociation_width target hook used by
46340 reassoc phase to identify parallelism level in reassociated
46341 tree. Statements tree_code is passed in OPC. Arguments type
46342 is passed in MODE.
46344 Currently parallel reassociation is enabled for Atom
46345 processors only and we set reassociation width to be 2
46346 because Atom may issue up to 2 instructions per cycle.
46348 Return value should be fixed if parallel reassociation is
46349 enabled for other processors. */
46351 static int
46352 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46353 enum machine_mode mode)
46355 int res = 1;
46357 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46358 res = 2;
46359 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46360 res = 2;
46362 return res;
46365 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46366 place emms and femms instructions. */
46368 static enum machine_mode
46369 ix86_preferred_simd_mode (enum machine_mode mode)
46371 if (!TARGET_SSE)
46372 return word_mode;
46374 switch (mode)
46376 case QImode:
46377 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46378 case HImode:
46379 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46380 case SImode:
46381 return TARGET_AVX512F ? V16SImode :
46382 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46383 case DImode:
46384 return TARGET_AVX512F ? V8DImode :
46385 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46387 case SFmode:
46388 if (TARGET_AVX512F)
46389 return V16SFmode;
46390 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46391 return V8SFmode;
46392 else
46393 return V4SFmode;
46395 case DFmode:
46396 if (!TARGET_VECTORIZE_DOUBLE)
46397 return word_mode;
46398 else if (TARGET_AVX512F)
46399 return V8DFmode;
46400 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46401 return V4DFmode;
46402 else if (TARGET_SSE2)
46403 return V2DFmode;
46404 /* FALLTHRU */
46406 default:
46407 return word_mode;
46411 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46412 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46413 256bit and 128bit vectors. */
46415 static unsigned int
46416 ix86_autovectorize_vector_sizes (void)
46418 return TARGET_AVX512F ? 64 | 32 | 16 :
46419 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46424 /* Return class of registers which could be used for pseudo of MODE
46425 and of class RCLASS for spilling instead of memory. Return NO_REGS
46426 if it is not possible or non-profitable. */
46427 static reg_class_t
46428 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46430 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46431 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46432 && INTEGER_CLASS_P (rclass))
46433 return ALL_SSE_REGS;
46434 return NO_REGS;
46437 /* Implement targetm.vectorize.init_cost. */
46439 static void *
46440 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46442 unsigned *cost = XNEWVEC (unsigned, 3);
46443 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46444 return cost;
46447 /* Implement targetm.vectorize.add_stmt_cost. */
46449 static unsigned
46450 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46451 struct _stmt_vec_info *stmt_info, int misalign,
46452 enum vect_cost_model_location where)
46454 unsigned *cost = (unsigned *) data;
46455 unsigned retval = 0;
46457 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46458 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46460 /* Statements in an inner loop relative to the loop being
46461 vectorized are weighted more heavily. The value here is
46462 arbitrary and could potentially be improved with analysis. */
46463 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46464 count *= 50; /* FIXME. */
46466 retval = (unsigned) (count * stmt_cost);
46467 cost[where] += retval;
46469 return retval;
46472 /* Implement targetm.vectorize.finish_cost. */
46474 static void
46475 ix86_finish_cost (void *data, unsigned *prologue_cost,
46476 unsigned *body_cost, unsigned *epilogue_cost)
46478 unsigned *cost = (unsigned *) data;
46479 *prologue_cost = cost[vect_prologue];
46480 *body_cost = cost[vect_body];
46481 *epilogue_cost = cost[vect_epilogue];
46484 /* Implement targetm.vectorize.destroy_cost_data. */
46486 static void
46487 ix86_destroy_cost_data (void *data)
46489 free (data);
46492 /* Validate target specific memory model bits in VAL. */
46494 static unsigned HOST_WIDE_INT
46495 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46497 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46498 bool strong;
46500 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46501 |MEMMODEL_MASK)
46502 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46504 warning (OPT_Winvalid_memory_model,
46505 "Unknown architecture specific memory model");
46506 return MEMMODEL_SEQ_CST;
46508 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46509 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46511 warning (OPT_Winvalid_memory_model,
46512 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46513 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46515 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46517 warning (OPT_Winvalid_memory_model,
46518 "HLE_RELEASE not used with RELEASE or stronger memory model");
46519 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46521 return val;
46524 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46525 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46526 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46527 or number of vecsize_mangle variants that should be emitted. */
46529 static int
46530 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46531 struct cgraph_simd_clone *clonei,
46532 tree base_type, int num)
46534 int ret = 1;
46536 if (clonei->simdlen
46537 && (clonei->simdlen < 2
46538 || clonei->simdlen > 16
46539 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46541 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46542 "unsupported simdlen %d", clonei->simdlen);
46543 return 0;
46546 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46547 if (TREE_CODE (ret_type) != VOID_TYPE)
46548 switch (TYPE_MODE (ret_type))
46550 case QImode:
46551 case HImode:
46552 case SImode:
46553 case DImode:
46554 case SFmode:
46555 case DFmode:
46556 /* case SCmode: */
46557 /* case DCmode: */
46558 break;
46559 default:
46560 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46561 "unsupported return type %qT for simd\n", ret_type);
46562 return 0;
46565 tree t;
46566 int i;
46568 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46569 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46570 switch (TYPE_MODE (TREE_TYPE (t)))
46572 case QImode:
46573 case HImode:
46574 case SImode:
46575 case DImode:
46576 case SFmode:
46577 case DFmode:
46578 /* case SCmode: */
46579 /* case DCmode: */
46580 break;
46581 default:
46582 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46583 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46584 return 0;
46587 if (clonei->cilk_elemental)
46589 /* Parse here processor clause. If not present, default to 'b'. */
46590 clonei->vecsize_mangle = 'b';
46592 else if (!TREE_PUBLIC (node->decl))
46594 /* If the function isn't exported, we can pick up just one ISA
46595 for the clones. */
46596 if (TARGET_AVX2)
46597 clonei->vecsize_mangle = 'd';
46598 else if (TARGET_AVX)
46599 clonei->vecsize_mangle = 'c';
46600 else
46601 clonei->vecsize_mangle = 'b';
46602 ret = 1;
46604 else
46606 clonei->vecsize_mangle = "bcd"[num];
46607 ret = 3;
46609 switch (clonei->vecsize_mangle)
46611 case 'b':
46612 clonei->vecsize_int = 128;
46613 clonei->vecsize_float = 128;
46614 break;
46615 case 'c':
46616 clonei->vecsize_int = 128;
46617 clonei->vecsize_float = 256;
46618 break;
46619 case 'd':
46620 clonei->vecsize_int = 256;
46621 clonei->vecsize_float = 256;
46622 break;
46624 if (clonei->simdlen == 0)
46626 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46627 clonei->simdlen = clonei->vecsize_int;
46628 else
46629 clonei->simdlen = clonei->vecsize_float;
46630 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46631 if (clonei->simdlen > 16)
46632 clonei->simdlen = 16;
46634 return ret;
46637 /* Add target attribute to SIMD clone NODE if needed. */
46639 static void
46640 ix86_simd_clone_adjust (struct cgraph_node *node)
46642 const char *str = NULL;
46643 gcc_assert (node->decl == cfun->decl);
46644 switch (node->simdclone->vecsize_mangle)
46646 case 'b':
46647 if (!TARGET_SSE2)
46648 str = "sse2";
46649 break;
46650 case 'c':
46651 if (!TARGET_AVX)
46652 str = "avx";
46653 break;
46654 case 'd':
46655 if (!TARGET_AVX2)
46656 str = "avx2";
46657 break;
46658 default:
46659 gcc_unreachable ();
46661 if (str == NULL)
46662 return;
46663 push_cfun (NULL);
46664 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46665 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46666 gcc_assert (ok);
46667 pop_cfun ();
46668 ix86_previous_fndecl = NULL_TREE;
46669 ix86_set_current_function (node->decl);
46672 /* If SIMD clone NODE can't be used in a vectorized loop
46673 in current function, return -1, otherwise return a badness of using it
46674 (0 if it is most desirable from vecsize_mangle point of view, 1
46675 slightly less desirable, etc.). */
46677 static int
46678 ix86_simd_clone_usable (struct cgraph_node *node)
46680 switch (node->simdclone->vecsize_mangle)
46682 case 'b':
46683 if (!TARGET_SSE2)
46684 return -1;
46685 if (!TARGET_AVX)
46686 return 0;
46687 return TARGET_AVX2 ? 2 : 1;
46688 case 'c':
46689 if (!TARGET_AVX)
46690 return -1;
46691 return TARGET_AVX2 ? 1 : 0;
46692 break;
46693 case 'd':
46694 if (!TARGET_AVX2)
46695 return -1;
46696 return 0;
46697 default:
46698 gcc_unreachable ();
46702 /* This function gives out the number of memory references.
46703 This value determines the unrolling factor for
46704 bdver3 and bdver4 architectures. */
46706 static int
46707 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46709 if (*x != NULL_RTX && MEM_P (*x))
46711 enum machine_mode mode;
46712 unsigned int n_words;
46714 mode = GET_MODE (*x);
46715 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46717 if (n_words > 4)
46718 (*mem_count)+=2;
46719 else
46720 (*mem_count)+=1;
46722 return 0;
46725 /* This function adjusts the unroll factor based on
46726 the hardware capabilities. For ex, bdver3 has
46727 a loop buffer which makes unrolling of smaller
46728 loops less important. This function decides the
46729 unroll factor using number of memory references
46730 (value 32 is used) as a heuristic. */
46732 static unsigned
46733 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46735 basic_block *bbs;
46736 rtx insn;
46737 unsigned i;
46738 unsigned mem_count = 0;
46740 if (!TARGET_ADJUST_UNROLL)
46741 return nunroll;
46743 /* Count the number of memory references within the loop body. */
46744 bbs = get_loop_body (loop);
46745 for (i = 0; i < loop->num_nodes; i++)
46747 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46748 if (NONDEBUG_INSN_P (insn))
46749 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46751 free (bbs);
46753 if (mem_count && mem_count <=32)
46754 return 32/mem_count;
46756 return nunroll;
46760 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46762 static bool
46763 ix86_float_exceptions_rounding_supported_p (void)
46765 /* For x87 floating point with standard excess precision handling,
46766 there is no adddf3 pattern (since x87 floating point only has
46767 XFmode operations) so the default hook implementation gets this
46768 wrong. */
46769 return TARGET_80387 || TARGET_SSE_MATH;
46772 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46774 static void
46775 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46777 if (!TARGET_80387 && !TARGET_SSE_MATH)
46778 return;
46779 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46780 if (TARGET_80387)
46782 tree fenv_index_type = build_index_type (size_int (6));
46783 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46784 tree fenv_var = create_tmp_var (fenv_type, NULL);
46785 mark_addressable (fenv_var);
46786 tree fenv_ptr = build_pointer_type (fenv_type);
46787 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46788 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46789 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46790 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46791 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46792 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46793 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46794 tree hold_fnclex = build_call_expr (fnclex, 0);
46795 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46796 hold_fnclex);
46797 *clear = build_call_expr (fnclex, 0);
46798 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46799 mark_addressable (sw_var);
46800 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46801 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46802 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46803 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46804 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46805 exceptions_var, exceptions_x87);
46806 *update = build2 (COMPOUND_EXPR, integer_type_node,
46807 fnstsw_call, update_mod);
46808 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46809 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46811 if (TARGET_SSE_MATH)
46813 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46814 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46815 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46816 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46817 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46818 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46819 mxcsr_orig_var, stmxcsr_hold_call);
46820 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46821 mxcsr_orig_var,
46822 build_int_cst (unsigned_type_node, 0x1f80));
46823 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46824 build_int_cst (unsigned_type_node, 0xffffffc0));
46825 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46826 mxcsr_mod_var, hold_mod_val);
46827 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46828 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46829 hold_assign_orig, hold_assign_mod);
46830 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46831 ldmxcsr_hold_call);
46832 if (*hold)
46833 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46834 else
46835 *hold = hold_all;
46836 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46837 if (*clear)
46838 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46839 ldmxcsr_clear_call);
46840 else
46841 *clear = ldmxcsr_clear_call;
46842 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46843 tree exceptions_sse = fold_convert (integer_type_node,
46844 stxmcsr_update_call);
46845 if (*update)
46847 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46848 exceptions_var, exceptions_sse);
46849 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46850 exceptions_var, exceptions_mod);
46851 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46852 exceptions_assign);
46854 else
46855 *update = build2 (MODIFY_EXPR, integer_type_node,
46856 exceptions_var, exceptions_sse);
46857 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46858 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46859 ldmxcsr_update_call);
46861 tree atomic_feraiseexcept
46862 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46863 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46864 1, exceptions_var);
46865 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46866 atomic_feraiseexcept_call);
46869 /* Initialize the GCC target structure. */
46870 #undef TARGET_RETURN_IN_MEMORY
46871 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46873 #undef TARGET_LEGITIMIZE_ADDRESS
46874 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46876 #undef TARGET_ATTRIBUTE_TABLE
46877 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46878 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46879 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46880 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46881 # undef TARGET_MERGE_DECL_ATTRIBUTES
46882 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46883 #endif
46885 #undef TARGET_COMP_TYPE_ATTRIBUTES
46886 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46888 #undef TARGET_INIT_BUILTINS
46889 #define TARGET_INIT_BUILTINS ix86_init_builtins
46890 #undef TARGET_BUILTIN_DECL
46891 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46892 #undef TARGET_EXPAND_BUILTIN
46893 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46895 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46896 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46897 ix86_builtin_vectorized_function
46899 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46900 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46902 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46903 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46905 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46906 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46908 #undef TARGET_BUILTIN_RECIPROCAL
46909 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46911 #undef TARGET_ASM_FUNCTION_EPILOGUE
46912 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46914 #undef TARGET_ENCODE_SECTION_INFO
46915 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46916 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46917 #else
46918 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46919 #endif
46921 #undef TARGET_ASM_OPEN_PAREN
46922 #define TARGET_ASM_OPEN_PAREN ""
46923 #undef TARGET_ASM_CLOSE_PAREN
46924 #define TARGET_ASM_CLOSE_PAREN ""
46926 #undef TARGET_ASM_BYTE_OP
46927 #define TARGET_ASM_BYTE_OP ASM_BYTE
46929 #undef TARGET_ASM_ALIGNED_HI_OP
46930 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46931 #undef TARGET_ASM_ALIGNED_SI_OP
46932 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46933 #ifdef ASM_QUAD
46934 #undef TARGET_ASM_ALIGNED_DI_OP
46935 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46936 #endif
46938 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46939 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46941 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46942 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46944 #undef TARGET_ASM_UNALIGNED_HI_OP
46945 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46946 #undef TARGET_ASM_UNALIGNED_SI_OP
46947 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46948 #undef TARGET_ASM_UNALIGNED_DI_OP
46949 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46951 #undef TARGET_PRINT_OPERAND
46952 #define TARGET_PRINT_OPERAND ix86_print_operand
46953 #undef TARGET_PRINT_OPERAND_ADDRESS
46954 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46955 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46956 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46957 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46958 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46960 #undef TARGET_SCHED_INIT_GLOBAL
46961 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46962 #undef TARGET_SCHED_ADJUST_COST
46963 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46964 #undef TARGET_SCHED_ISSUE_RATE
46965 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46966 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46967 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46968 ia32_multipass_dfa_lookahead
46969 #undef TARGET_SCHED_MACRO_FUSION_P
46970 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46971 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46972 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46974 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46975 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46977 #undef TARGET_MEMMODEL_CHECK
46978 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46980 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46981 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46983 #ifdef HAVE_AS_TLS
46984 #undef TARGET_HAVE_TLS
46985 #define TARGET_HAVE_TLS true
46986 #endif
46987 #undef TARGET_CANNOT_FORCE_CONST_MEM
46988 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46989 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46990 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46992 #undef TARGET_DELEGITIMIZE_ADDRESS
46993 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46995 #undef TARGET_MS_BITFIELD_LAYOUT_P
46996 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46998 #if TARGET_MACHO
46999 #undef TARGET_BINDS_LOCAL_P
47000 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47001 #endif
47002 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47003 #undef TARGET_BINDS_LOCAL_P
47004 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47005 #endif
47007 #undef TARGET_ASM_OUTPUT_MI_THUNK
47008 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47009 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47010 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47012 #undef TARGET_ASM_FILE_START
47013 #define TARGET_ASM_FILE_START x86_file_start
47015 #undef TARGET_OPTION_OVERRIDE
47016 #define TARGET_OPTION_OVERRIDE ix86_option_override
47018 #undef TARGET_REGISTER_MOVE_COST
47019 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47020 #undef TARGET_MEMORY_MOVE_COST
47021 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47022 #undef TARGET_RTX_COSTS
47023 #define TARGET_RTX_COSTS ix86_rtx_costs
47024 #undef TARGET_ADDRESS_COST
47025 #define TARGET_ADDRESS_COST ix86_address_cost
47027 #undef TARGET_FIXED_CONDITION_CODE_REGS
47028 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47029 #undef TARGET_CC_MODES_COMPATIBLE
47030 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47032 #undef TARGET_MACHINE_DEPENDENT_REORG
47033 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47035 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47036 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47038 #undef TARGET_BUILD_BUILTIN_VA_LIST
47039 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47041 #undef TARGET_FOLD_BUILTIN
47042 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47044 #undef TARGET_COMPARE_VERSION_PRIORITY
47045 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47047 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47048 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47049 ix86_generate_version_dispatcher_body
47051 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47052 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47053 ix86_get_function_versions_dispatcher
47055 #undef TARGET_ENUM_VA_LIST_P
47056 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47058 #undef TARGET_FN_ABI_VA_LIST
47059 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47061 #undef TARGET_CANONICAL_VA_LIST_TYPE
47062 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47064 #undef TARGET_EXPAND_BUILTIN_VA_START
47065 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47067 #undef TARGET_MD_ASM_CLOBBERS
47068 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47070 #undef TARGET_PROMOTE_PROTOTYPES
47071 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47072 #undef TARGET_SETUP_INCOMING_VARARGS
47073 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47074 #undef TARGET_MUST_PASS_IN_STACK
47075 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47076 #undef TARGET_FUNCTION_ARG_ADVANCE
47077 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47078 #undef TARGET_FUNCTION_ARG
47079 #define TARGET_FUNCTION_ARG ix86_function_arg
47080 #undef TARGET_FUNCTION_ARG_BOUNDARY
47081 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47082 #undef TARGET_PASS_BY_REFERENCE
47083 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47084 #undef TARGET_INTERNAL_ARG_POINTER
47085 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47086 #undef TARGET_UPDATE_STACK_BOUNDARY
47087 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47088 #undef TARGET_GET_DRAP_RTX
47089 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47090 #undef TARGET_STRICT_ARGUMENT_NAMING
47091 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47092 #undef TARGET_STATIC_CHAIN
47093 #define TARGET_STATIC_CHAIN ix86_static_chain
47094 #undef TARGET_TRAMPOLINE_INIT
47095 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47096 #undef TARGET_RETURN_POPS_ARGS
47097 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47099 #undef TARGET_LEGITIMATE_COMBINED_INSN
47100 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47102 #undef TARGET_ASAN_SHADOW_OFFSET
47103 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47105 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47106 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47108 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47109 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47111 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47112 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47114 #undef TARGET_C_MODE_FOR_SUFFIX
47115 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47117 #ifdef HAVE_AS_TLS
47118 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47119 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47120 #endif
47122 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47123 #undef TARGET_INSERT_ATTRIBUTES
47124 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47125 #endif
47127 #undef TARGET_MANGLE_TYPE
47128 #define TARGET_MANGLE_TYPE ix86_mangle_type
47130 #if !TARGET_MACHO
47131 #undef TARGET_STACK_PROTECT_FAIL
47132 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47133 #endif
47135 #undef TARGET_FUNCTION_VALUE
47136 #define TARGET_FUNCTION_VALUE ix86_function_value
47138 #undef TARGET_FUNCTION_VALUE_REGNO_P
47139 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47141 #undef TARGET_PROMOTE_FUNCTION_MODE
47142 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47144 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47145 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47147 #undef TARGET_INSTANTIATE_DECLS
47148 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47150 #undef TARGET_SECONDARY_RELOAD
47151 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47153 #undef TARGET_CLASS_MAX_NREGS
47154 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47156 #undef TARGET_PREFERRED_RELOAD_CLASS
47157 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47158 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47159 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47160 #undef TARGET_CLASS_LIKELY_SPILLED_P
47161 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47163 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47164 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47165 ix86_builtin_vectorization_cost
47166 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47167 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47168 ix86_vectorize_vec_perm_const_ok
47169 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47170 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47171 ix86_preferred_simd_mode
47172 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47173 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47174 ix86_autovectorize_vector_sizes
47175 #undef TARGET_VECTORIZE_INIT_COST
47176 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47177 #undef TARGET_VECTORIZE_ADD_STMT_COST
47178 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47179 #undef TARGET_VECTORIZE_FINISH_COST
47180 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47181 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47182 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47184 #undef TARGET_SET_CURRENT_FUNCTION
47185 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47187 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47188 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47190 #undef TARGET_OPTION_SAVE
47191 #define TARGET_OPTION_SAVE ix86_function_specific_save
47193 #undef TARGET_OPTION_RESTORE
47194 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47196 #undef TARGET_OPTION_PRINT
47197 #define TARGET_OPTION_PRINT ix86_function_specific_print
47199 #undef TARGET_OPTION_FUNCTION_VERSIONS
47200 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47202 #undef TARGET_CAN_INLINE_P
47203 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47205 #undef TARGET_EXPAND_TO_RTL_HOOK
47206 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47208 #undef TARGET_LEGITIMATE_ADDRESS_P
47209 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47211 #undef TARGET_LRA_P
47212 #define TARGET_LRA_P hook_bool_void_true
47214 #undef TARGET_REGISTER_PRIORITY
47215 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47217 #undef TARGET_REGISTER_USAGE_LEVELING_P
47218 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47220 #undef TARGET_LEGITIMATE_CONSTANT_P
47221 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47223 #undef TARGET_FRAME_POINTER_REQUIRED
47224 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47226 #undef TARGET_CAN_ELIMINATE
47227 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47229 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47230 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47232 #undef TARGET_ASM_CODE_END
47233 #define TARGET_ASM_CODE_END ix86_code_end
47235 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47236 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47238 #if TARGET_MACHO
47239 #undef TARGET_INIT_LIBFUNCS
47240 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47241 #endif
47243 #undef TARGET_LOOP_UNROLL_ADJUST
47244 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47246 #undef TARGET_SPILL_CLASS
47247 #define TARGET_SPILL_CLASS ix86_spill_class
47249 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47250 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47251 ix86_simd_clone_compute_vecsize_and_simdlen
47253 #undef TARGET_SIMD_CLONE_ADJUST
47254 #define TARGET_SIMD_CLONE_ADJUST \
47255 ix86_simd_clone_adjust
47257 #undef TARGET_SIMD_CLONE_USABLE
47258 #define TARGET_SIMD_CLONE_USABLE \
47259 ix86_simd_clone_usable
47261 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47262 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47263 ix86_float_exceptions_rounding_supported_p
47265 struct gcc_target targetm = TARGET_INITIALIZER;
47267 #include "gt-i386.h"